forked from igvteam/igv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataUtils.java
156 lines (123 loc) · 4.53 KB
/
DataUtils.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*
* Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR
* WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER
* OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE
* TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES
* OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
* ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER
* THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT
* SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.data;
//~--- JDK imports ------------------------------------------------------------
import java.io.*;
/**
* @author jrobinso
*/
public class DataUtils {
public static int getIndexBefore(int[] values, int x) {
return getIndexBefore(values, x, 0, values.length);
}
public static int getIndexBefore(int[] values, int x, int leftBound, int rightBound) {
int idx = (leftBound + rightBound) / 2;
if ((idx == 0) || (idx == values.length - 1)) {
return idx;
}
if (values[idx] == x) {
return idx;
}
if (values[idx] < x) {
if (values[idx + 1] >= x) {
return idx;
} else {
leftBound = idx;
return getIndexBefore(values, x, leftBound, rightBound);
}
} else { // values[idx] > x
if (values[idx - 1] <= x) {
return idx - 1;
} else {
rightBound = idx;
return getIndexBefore(values, x, leftBound, rightBound);
}
}
}
/**
* Estimate the number of rows in an ascii data file. Estimate is based on
* the first 100 lines, and assumes the line length is approximately
* constant.
*
* @param textFile
* @return
*/
public static AsciiFileMetrics estimateFileMetrics(String textFile) {
int estRowCount = 0;
try {
BufferedReader reader = null;
File file = new File(textFile);
reader = new BufferedReader(new FileReader(file));
String nextLine = reader.readLine();
double lineCount = 0;
double nChars = 0;
while ((nextLine = reader.readLine()) != null && (lineCount < 100)) {
nChars += nextLine.length();
lineCount++;
}
int columnCount = nextLine.split("\t").length;
double charsPerLine = ((lineCount > 0) ? nChars / lineCount : 0);
estRowCount = (int) (file.length() / charsPerLine);
return new AsciiFileMetrics(estRowCount, columnCount, charsPerLine);
}
catch (FileNotFoundException ex) {
ex.printStackTrace();
}
catch (IOException ex) {
ex.printStackTrace();
}
return null;
}
/**
* Method description
*
* @param fileMetrics
* @return
*/
public static int estimatePreprocessingTime(AsciiFileMetrics fileMetrics) {
return 8 + (int) ((0.0036 * fileMetrics.getEstRowCount() * fileMetrics.getColumnCount()) / 100);
}
/**
* This class has some useful metrics for optimizing reading of large ascii files
*
* @author jrobinso
*/
public static class AsciiFileMetrics {
private int estRowCount;
private int columnCount;
private double estBytesPerLine;
public AsciiFileMetrics(int estRowCount, int columnCount, double estBytesPerLine) {
this.estRowCount = estRowCount;
this.columnCount = columnCount;
this.estBytesPerLine = estBytesPerLine;
}
public double getEstBytesPerLine() {
return estBytesPerLine;
}
public int getEstRowCount() {
return estRowCount;
}
public int getColumnCount() {
return columnCount;
}
}
}