From 4fac2630a6951da4f1fdd6880a26b41c27c1ff9d Mon Sep 17 00:00:00 2001 From: turingfly Date: Fri, 28 Jul 2017 19:39:49 -0400 Subject: [PATCH] Sorting and Searching --- .../GroupAnagrams.java | 75 +++++++++++++++++++ .../SearchInRotatedArray.java | 46 ++++++++++++ .../SortBigFile.java | 62 +++++++++++++++ .../SortedMerge.java | 28 +++++++ .../SortedSearchNoSize.java | 64 ++++++++++++++++ .../SparseSearch.java | 53 +++++++++++++ 6 files changed, 328 insertions(+) create mode 100644 src/chapter10SortingAndSearching/GroupAnagrams.java create mode 100644 src/chapter10SortingAndSearching/SearchInRotatedArray.java create mode 100644 src/chapter10SortingAndSearching/SortBigFile.java create mode 100644 src/chapter10SortingAndSearching/SortedMerge.java create mode 100644 src/chapter10SortingAndSearching/SortedSearchNoSize.java create mode 100644 src/chapter10SortingAndSearching/SparseSearch.java diff --git a/src/chapter10SortingAndSearching/GroupAnagrams.java b/src/chapter10SortingAndSearching/GroupAnagrams.java new file mode 100644 index 0000000..b825b82 --- /dev/null +++ b/src/chapter10SortingAndSearching/GroupAnagrams.java @@ -0,0 +1,75 @@ +package chapter10SortingAndSearching; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List;;; + +/** + * + * Problem: Write a method to sort an array of strings so that all the anagrams + * are next to each other. + * + */ +public class GroupAnagrams { + /** + * Method 1: Sort + * + * Time Complexity: O(NlogN) + */ + public String[] sort1(String[] strs) { + Comparator comp = new Comparator() { + private String sortChars(String s) { + char[] chars = s.toCharArray(); + Arrays.sort(chars); + return new String(chars); + } + + public int compare(String s1, String s2) { + return sortChars(s1).compareTo(sortChars(s2)); + } + }; + Arrays.sort(strs, comp); + return strs; + } + + /** + * Method 2: Use a HashTable which maps from the sorted version of a word to + * a list of its anagrams. Then put them back into the array. + * + * The idea of bucket sort + */ + public String[] sort2(String[] strs) { + HashMap> map = new HashMap<>(); + for (String str : strs) { + String key = sortChars2(str); + if (!map.containsKey(key)) { + List list = new ArrayList<>(); + map.put(key, list); + } + map.get(key).add(str); + } + // put them into the array + int index = 0; + for (String key : map.keySet()) { + List list = map.get(key); + for (String s : list) { + strs[index++] = s; + } + } + return strs; + } + + private String sortChars2(String s) { + char[] chars = s.toCharArray(); + Arrays.sort(chars); + return new String(chars); + } + + public static void main(String[] args) { + GroupAnagrams g = new GroupAnagrams(); + String[] strs = { "ab", "ba", "aad", "dda", "add", "ada" }; + System.out.println(Arrays.toString(g.sort2(strs))); + } +} diff --git a/src/chapter10SortingAndSearching/SearchInRotatedArray.java b/src/chapter10SortingAndSearching/SearchInRotatedArray.java new file mode 100644 index 0000000..091d52a --- /dev/null +++ b/src/chapter10SortingAndSearching/SearchInRotatedArray.java @@ -0,0 +1,46 @@ +package chapter10SortingAndSearching; + +/** + * + * Problem: Suppose an array sorted in ascending order is rotated at some pivot + * unknown to you beforehand. + * + * (i.e., 0 1 2 4 5 6 7 might become 4 5 6 7 0 1 2). + * + * You are given a target value to search. If found in the array return its + * index, otherwise return -1. + * + * You may assume no duplicate exists in the array. + * + */ +public class SearchInRotatedArray { + public int search(int[] nums, int target) { + if (nums == null || nums.length == 0) { + return -1; + } + int left = 0; + int right = nums.length - 1; + int mid = 0; + while (left + 1 < right) { + mid = left + (right - left) / 2; + if (nums[mid] == target) { + return mid; + } + // largest number is on the left + if (nums[mid] < nums[right]) { + if (target > nums[mid] && target <= nums[right]) { + left = mid; + } else { + right = mid; + } + } else { // largest number is on the right + if (target < nums[mid] && target >= nums[left]) { + right = mid; + } else { + left = mid; + } + } + } + return nums[left] == target ? left : nums[right] == target ? right : -1; + } +} diff --git a/src/chapter10SortingAndSearching/SortBigFile.java b/src/chapter10SortingAndSearching/SortBigFile.java new file mode 100644 index 0000000..79a73c4 --- /dev/null +++ b/src/chapter10SortingAndSearching/SortBigFile.java @@ -0,0 +1,62 @@ +package chapter10SortingAndSearching; + +/** + * + * Problem: Imagine you have a 20 GB file with one string per line. Explain how + * you would sort the file. + * + * Solution: Divide the file into N chunks which are x megabytes each, where x + * is the amount of memory we have available. Each chunk is sorted separately + * and then saved back to the file system. Once all the chunks are sorted, we + * then merge the chunks according to the following algorithm: + * + * 1. Divide your memory into (N+1) parts. First N parts are used to read data + * from N chunks, the last one is used as a buffer. + * + * 2. Load data to fill the first N data parts from N chunks respectively, + * perform an N-way merge sort to the buffer. + * + * 3. While any data part is not empty, perform sort to the buffer. + * + * 4. If any data part is empty, load new content from the corresponding chunk. + * + * 5. If the buffer is full, write buffer to the disk as output file, clear + * buffer. + * + * 6. Repeat step 4-5 until all N chunks and buffer are empty. + * + * At the end, we have output that is fully sorted on the disk. This algorithm + * is known as external sort. + * + * One example of external sorting is the external merge sort algorithm, which + * sorts chunks that each fit in RAM, then merges the sorted chunks + * together.[1][2] For example, for sorting 900 megabytes of data using only 100 + * megabytes of RAM: + * + * 1. Read 100 MB of the data in main memory and sort by some conventional + * method, like quicksort. + * + * 2. Write the sorted data to disk. + * + * 3. Repeat steps 1 and 2 until all of the data is in sorted 100 MB chunks + * (there are 900MB / 100MB = 9 chunks), which now need to be merged into one + * single output file. + * + * 4. Read the first 10 MB (= 100MB / (9 chunks + 1)) of each sorted chunk into + * input buffers in main memory and allocate the remaining 10 MB for an output + * buffer. (In practice, it might provide better performance to make the output + * buffer larger and the input buffers slightly smaller.) + * + * 5. Perform a 9-way merge and store the result in the output buffer. Whenever + * the output buffer fills, write it to the final sorted file and empty it. + * Whenever any of the 9 input buffers empties, fill it with the next 10 MB of + * its associated 100 MB sorted chunk until no more data from the chunk is + * available. This is the key step that makes external merge sort work + * externally -- because the merge algorithm only makes one pass sequentially + * through each of the chunks, each chunk does not have to be loaded completely; + * rather, sequential parts of the chunk can be loaded as needed. + * + */ +public class SortBigFile { + +} diff --git a/src/chapter10SortingAndSearching/SortedMerge.java b/src/chapter10SortingAndSearching/SortedMerge.java new file mode 100644 index 0000000..8b154af --- /dev/null +++ b/src/chapter10SortingAndSearching/SortedMerge.java @@ -0,0 +1,28 @@ +package chapter10SortingAndSearching; + +/** + * + * Problem: You are given two sorted arrays, A and B, where A has a large enough + * buffer at the end to hold B. Write a method to merge B into A in sorted + * order. + * + * Solution: Shift the existing elements backwards to make room for it. + * + */ +public class SortedMerge { + public void merge(int[] nums1, int[] nums2) { + int index1 = nums1.length - 1; + int index2 = nums2.length - 1; + int mergedIndex = index1 + index2 + 1; + while (index2 >= 0 && index1 >= 0) { + if (nums1[index1] > nums2[index2]) { + nums2[mergedIndex] = nums1[index1]; + index1--; + } else { + nums2[mergedIndex] = nums1[index2]; + index2--; + } + mergedIndex--; + } + } +} diff --git a/src/chapter10SortingAndSearching/SortedSearchNoSize.java b/src/chapter10SortingAndSearching/SortedSearchNoSize.java new file mode 100644 index 0000000..1f13f34 --- /dev/null +++ b/src/chapter10SortingAndSearching/SortedSearchNoSize.java @@ -0,0 +1,64 @@ +package chapter10SortingAndSearching; + +/** + * + * Problem: You are given an array-like data structure Listy which lacks a size + * method. It does however, have an elementAt(i) method that returns the element + * at index i in O(1) time. If i is beyond the bounds of the data structure, it + * return -1 (for this reason, the data structure only supports positive + * integers).Given a Listy which contains sorted, positive integers, find the + * index at which an element x occurs. If x occurs multiple times, you may + * return any index. + * + * Time Complexity: O(logN), find the length in O(logN), sort the length in + * O(logN) + * + */ +public class SortedSearchNoSize { + public static int search(Listy list, int val) { + int index = 1; + while (list.elementAt(index) != -1 && list.elementAt(index) < val) { + index *= 2; + } + return binarySearch(list, val, index / 2, index); + + } + + public static int binarySearch(Listy list, int val, int left, int right) { + int mid = 0; + while (left + 1 < right) { + mid = left + (right - left) / 2; + if (list.elementAt(mid) == val) { + return mid; + } else if (list.elementAt(mid) > val) { + // go left + right = mid; + } else { + // go right + left = mid; + } + } + return list.elementAt(left) == val ? left : list.elementAt(right) == val ? right : -1; + } + + public static void main(String[] args) { + int[] array = { 1, 2, 3, 4, 5, 10, 15 }; + Listy list = new Listy(array); + System.out.println(search(list, 2)); + } +} + +class Listy { + int[] array; + + public Listy(int[] arr) { + array = arr.clone(); + } + + public int elementAt(int index) { + if (index >= array.length) { + return -1; + } + return array[index]; + } +} \ No newline at end of file diff --git a/src/chapter10SortingAndSearching/SparseSearch.java b/src/chapter10SortingAndSearching/SparseSearch.java new file mode 100644 index 0000000..26fed69 --- /dev/null +++ b/src/chapter10SortingAndSearching/SparseSearch.java @@ -0,0 +1,53 @@ +package chapter10SortingAndSearching; + +/** + * + * Problem: Given a sorted array of String that is interspersed with empty + * strings, write a method to find the location of a given string. + * + * Example: Input "ball", {"", "", "", "", "ball", "", "", "car"}, output 4 + * + * Time Complexity: O(logN) average, O(N) worst case. + */ +public class SparseSearch { + public static int searchI(String[] strings, String str) { + int left = 0; + int right = strings.length - 1; + while (left <= right) { + int mid = (right + left) / 2; + System.out.println(right + " " + left + " " + mid); + // If mid is empty, find closest non-empty string + if (strings[mid].equals("")) { + int left1 = mid - 1; + int right1 = mid + 1; + while (true) { + if (left1 < left && right1 > right) { + return -1; + } else if (right1 <= right && !strings[right1].equals("")) { + mid = right1; + break; + } else if (left1 >= left && !strings[left1].equals("")) { + mid = left1; + break; + } + right1++; + left1--; + } + } + int res = strings[mid].compareTo(str); + if (res == 0) { + return mid; + } else if (res < 0) { + left = mid + 1; + } else { + right = mid - 1; + } + } + return -1; + } + + public static void main(String[] args) { + String[] stringList = { "", "", "", "", "ball", "", "", "car" }; + System.out.println(searchI(stringList, "ball")); + } +}