scp reader/data

IspML · Dec 22, 2017 · 9cef537 · 9cef537
1 parent 27cc7e3
commit 9cef537
Show file tree

Hide file tree

Showing 4 changed files with 369 additions and 0 deletions.
diff --git a/ortools/data/set_covering_data.cc b/ortools/data/set_covering_data.cc
@@ -0,0 +1,37 @@
+// Copyright 2010-2017 Google
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ortools/data/set_covering_data.h"
+
+namespace operations_research {
+namespace scp {
+
+void ScpData::SetProblemSize(int num_rows, int num_columns) {
+  columns_per_row_.clear();
+  columns_per_row_.resize(num_rows);
+  rows_per_column_.clear();
+  rows_per_column_.resize(num_columns);
+  column_costs_.resize(num_columns, 0);
+}
+
+void ScpData::SetColumnCost(int column_id, int cost) {
+  column_costs_[column_id] = cost;
+}
+
+void ScpData::AddRowInColumn(int row_id, int column_id) {
+  rows_per_column_[column_id].push_back(row_id);
+  columns_per_row_[row_id].push_back(column_id);
+}
+
+}  // namespace scp
+}  // namespace operations_research
diff --git a/ortools/data/set_covering_data.h b/ortools/data/set_covering_data.h
@@ -0,0 +1,54 @@
+// Copyright 2010-2017 Google
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OR_TOOLS_DATA_SET_COVERING_DATA_H_
+#define OR_TOOLS_DATA_SET_COVERING_DATA_H_
+
+#include <vector>
+
+#include "ortools/base/integral_types.h"
+
+namespace operations_research {
+namespace scp {
+
+class ScpData {
+ public:
+  // Getters.
+  int num_rows() const { return columns_per_row_.size(); }
+  int num_columns() const { return rows_per_column_.size(); }
+  // columns_per_row[i][j] returns the index of the jth column covering row i.
+  const std::vector<std::vector<int>>& columns_per_row() const {
+    return columns_per_row_;
+  }
+  // rows_per_column[i][j] returns the index of the jth row covering column i.
+  const std::vector<std::vector<int>>& rows_per_column() const {
+    return rows_per_column_;
+  }
+  const std::vector<int>& column_costs() const { return column_costs_; }
+
+  // Builders.
+  // Calling SetProblemSize() will clear all previous data.
+  void SetProblemSize(int num_rows, int num_columns);
+  void SetColumnCost(int column_id, int cost);
+  void AddRowInColumn(int row, int column);
+
+ private:
+  std::vector<std::vector<int>> columns_per_row_;
+  std::vector<std::vector<int>> rows_per_column_;
+  std::vector<int> column_costs_;
+};
+
+}  // namespace scp
+}  // namespace operations_research
+
+#endif  // OR_TOOLS_DATA_SET_COVERING_DATA_H_
diff --git a/ortools/data/set_covering_parser.cc b/ortools/data/set_covering_parser.cc
@@ -0,0 +1,180 @@
+// Copyright 2010-2017 Google
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ortools/data/set_covering_parser.h"
+
+#include "ortools/base/strtoint.h"
+#include "ortools/base/numbers.h"
+#include "ortools/base/split.h"
+#include "ortools/base/filelineiter.h"
+
+namespace operations_research {
+namespace scp {
+
+using ::strings::delimiter::AnyOf;
+
+ScpParser::ScpParser() : section_(INIT), line_(0), remaining_(0), current_(0) {}
+
+bool ScpParser::LoadProblem(const std::string& filename, Format format,
+                            ScpData* data) {
+  section_ = INIT;
+  line_ = 0;
+  remaining_ = 0;
+  current_ = 0;
+
+  for (const std::string& line : FileLines(filename)) {
+    ProcessLine(line, format, data);
+    if (section_ == ERROR) return false;
+  }
+  return section_ == END;
+}
+
+void ScpParser::ProcessLine(const std::string& line, Format format, ScpData* data) {
+  line_++;
+  const std::vector<std::string> words =
+      absl::StrSplit(line, AnyOf(" :\t\r"), absl::SkipEmpty());
+  switch (section_) {
+    case INIT: {
+      if (words.size() != 2) {
+        LogError(line, "Problem reading the size of the problem");
+        return;
+      }
+      const int num_rows = atoi32(words[0]);
+      const int num_columns = atoi32(words[1]);
+      data->SetProblemSize(num_rows, num_columns);
+      current_ = 0;
+      switch (format) {
+        case SCP_FORMAT: {
+          section_ = COSTS;
+          break;
+        }
+        case RAILROAD_FORMAT: {
+          section_ = COLUMN;
+          break;
+        }
+        case TRIPLET_FORMAT: {
+          section_ = COLUMN;
+          break;
+        }
+      }
+      break;
+    }
+    case COSTS: {
+      const int num_items = words.size();
+      if (current_ + num_items > data->num_columns()) {
+        LogError(line, "Too many cost items");
+        return;
+      }
+      for (int i = 0; i < num_items; ++i) {
+        data->SetColumnCost(current_++, atoi32(words[i]));
+      }
+      if (current_ == data->num_columns()) {
+        section_ = NUM_COLUMNS_IN_ROW;
+        current_ = 0;
+      }
+      break;
+    }
+    case COLUMN: {
+      switch (format) {
+        case SCP_FORMAT: {
+          LogError(line, "Wrong state in the loader");
+          return;
+        }
+        case RAILROAD_FORMAT: {
+          if (words.size() < 2) {
+            LogError(line, "Column declaration too short");
+            return;
+          }
+          const int cost = atoi32(words[0]);
+          data->SetColumnCost(current_, cost);
+          const int num_items = atoi32(words[1]);
+          if (words.size() != 2 + num_items) {
+            LogError(line, "Mistatch in column declaration");
+            return;
+          }
+          for (int i = 0; i < num_items; ++i) {
+            const int row = atoi32(words[i + 2]) - 1;  // 1 based.
+            data->AddRowInColumn(row, current_);
+          }
+          current_++;
+          if (current_ == data->num_columns()) {
+            section_ = END;
+          }
+          break;
+        }
+        case TRIPLET_FORMAT: {
+          if (words.size() != 3) {
+            LogError(line, "Column declaration does not contain 3 rows");
+            break;
+          }
+          data->SetColumnCost(current_, 1);
+          for (int i = 0; i < 3; ++i) {
+            const int row = atoi32(words[i]) - 1;  // 1 based.
+            data->AddRowInColumn(row, current_);
+          }
+          current_++;
+          if (current_ == data->num_columns()) {
+            section_ = END;
+          }
+          break;
+        }
+      }
+      break;
+    }
+    case NUM_COLUMNS_IN_ROW: {
+      if (words.size() != 1) {
+        LogError(line, "The header of a column should be one number");
+        return;
+      }
+      remaining_ = atoi32(words[0]);
+      section_ = ROW;
+      break;
+    }
+    case ROW: {
+      const int num_items = words.size();
+      if (num_items > remaining_) {
+        LogError(line, "Too many columns in a row declaration");
+        return;
+      }
+      for (const std::string& w : words) {
+        remaining_--;
+        const int column = atoi32(w) - 1;  // 1 based.
+        data->AddRowInColumn(current_, column);
+      }
+      if (remaining_ == 0) {
+        current_++;
+        if (current_ == data->num_rows()) {
+          section_ = END;
+        } else {
+          section_ = NUM_COLUMNS_IN_ROW;
+        }
+      }
+      break;
+    }
+    case END: {
+      break;
+    }
+    case ERROR: {
+      break;
+    }
+  }
+}
+
+void ScpParser::LogError(const std::string& line, const std::string& message) {
+  LOG(ERROR) << "Error on line " << line_ << ": " << message << "(" << line
+             << ")";
+  section_ = ERROR;
+}
+
+}  // namespace scp
+}  // namespace operations_research
diff --git a/ortools/data/set_covering_parser.h b/ortools/data/set_covering_parser.h
@@ -0,0 +1,98 @@
+// Copyright 2010-2017 Google
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OR_TOOLS_DATA_SET_COVERING_PARSER_H_
+#define OR_TOOLS_DATA_SET_COVERING_PARSER_H_
+
+#include <string>
+#include <vector>
+
+#include "ortools/base/integral_types.h"
+#include "ortools/data/set_covering_data.h"
+
+namespace operations_research {
+namespace scp {
+
+// Set covering problem.
+//
+// We have a list of subsets of a set. Each subset has a cost.  The
+// goal is to select of solution set of subsets such that (1) all elements
+// of the set belongs to at least one subset of the solution set, and (2)
+// the sum of the cost of each subset in the solution set is minimal.
+//
+// To follow the standard literature, each element is called a row, and each
+// subset is called a column.
+
+class ScpParser {
+ public:
+  enum Section {
+    INIT,
+    COSTS,
+    COLUMN,
+    NUM_COLUMNS_IN_ROW,
+    ROW,
+    END,
+    ERROR,
+  };
+
+  enum Format {
+    // The original scp format of these problem is:
+    //
+    // number of rows (m), number of columns (n)
+    //
+    // the cost of each column c(j),j=1,...,n
+    //
+    // for each row i (i=1,...,m): the number of columns which cover row
+    // i followed by a list of the columns which cover row i.
+    //
+    // The original problems (scp*) from the OR-LIB follow this format.
+    SCP_FORMAT,
+    // The railroad format is:
+    //   number of rows (m), number of columns (n)
+    //
+    //   for each column j (j=1,...,n): the cost of the column, the number
+    //   of rows that it covers followed by a list of the rows that it
+    //   covers.
+    //
+    // The railroad problems follow this format.
+    RAILROAD_FORMAT,
+    // The triplet format is:
+    //
+    // number of rows (m), number of columns (n)
+    //
+    // for each column, the 3 rows it contains.  Note that the cost of
+    // each column is 1.
+    //
+    // The Steiner triple covering problems follow this format.
+    TRIPLET_FORMAT
+  };
+
+  ScpParser();
+
+  // This will clear the data before importing the file.
+  bool LoadProblem(const std::string& filename, Format format, ScpData* data);
+
+ private:
+  void ProcessLine(const std::string& line, Format format, ScpData* data);
+  void LogError(const std::string& line, const std::string& error_message);
+
+  Section section_;
+  int line_;
+  int remaining_;
+  int current_;
+};
+
+}  // namespace scp
+}  // namespace operations_research
+
+#endif  // OR_TOOLS_DATA_SET_COVERING_PARSER_H_