public class MLUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
MLUtils.ColumnSeparatorFactory |
static class |
MLUtils.DataTypeFactory |
Constructor and Description |
---|
MLUtils() |
Modifier and Type | Method and Description |
---|---|
static String |
arrayToCsvString(String[] array,
char delimiter)
Utility method to convert a String array to CSV/TSV row string.
|
static String |
extractHeaderLine(String path,
int tenantId) |
static String |
extractTableSchema(String path,
int tenantId) |
static org.apache.spark.api.java.JavaRDD<String[]> |
filterRows(String delimiter,
String headerRow,
org.apache.spark.api.java.JavaRDD<String> lines,
List<Integer> featureIndices)
Applies the discard filter to a JavaRDD
|
static Map<String,Integer> |
generateHeaderMap(int numberOfFeatures) |
static Map<String,Integer> |
generateHeaderMap(String line,
org.apache.commons.csv.CSVFormat format) |
static String |
getDate() |
static String |
getErrorMsg(String customMessage,
Exception ex)
format an error message.
|
static int |
getFeatureIndex(String featureName,
List<org.wso2.carbon.ml.commons.domain.Feature> features)
Retrieve the index of a feature in the dataset.
|
static int |
getFeatureIndex(String feature,
String headerRow,
String columnSeparator)
Retrieve the index of a feature in the dataset.
|
static String[] |
getFeatures(String line,
org.apache.commons.csv.CSVFormat format) |
static int |
getFeatureSize(String line,
org.apache.commons.csv.CSVFormat format) |
static String |
getFirstLine(String filePath) |
static List<Integer> |
getImputeFeatureIndices(org.wso2.carbon.ml.commons.domain.Workflow workflow,
List<Integer> newToOldIndicesList,
String imputeOption)
Retrieve the indices of features where discard row imputaion is applied.
|
static SortedMap<Integer,String> |
getIncludedFeatures(org.wso2.carbon.ml.commons.domain.Workflow workflow,
int responseIndex) |
static SortedMap<Integer,String> |
getIncludedFeaturesAfterReordering(org.wso2.carbon.ml.commons.domain.Workflow workflow,
List<Integer> newToOldIndicesList,
int responseIndex) |
static <T,E> T |
getKeyByValue(Map<T,E> map,
E value)
Utility method to get key from value of a map.
|
static org.apache.spark.api.java.JavaRDD<String> |
getLinesFromDASTable(String tableName,
int tenantId,
org.apache.spark.api.java.JavaSparkContext sparkContext) |
static org.wso2.carbon.ml.commons.domain.MLDatasetVersion |
getMLDatsetVersion(int tenantId,
long datasetId,
String userName,
String name,
String version,
String targetPath) |
static Pattern |
getPatternFromDelimiter(String delimiter)
Generates a pattern to represent CSV or TSV format.
|
static Properties |
getProperties(List<org.wso2.carbon.ml.commons.domain.config.MLProperty> mlProperties)
Get
Properties from a list of MLProperty |
static org.wso2.carbon.ml.commons.domain.SamplePoints |
getSample(String path,
String dataType,
int sampleSize,
boolean containsHeader,
String sourceType,
int tenantId)
Generate a random sample of the dataset using Spark.
|
static org.wso2.carbon.ml.commons.domain.SamplePoints |
getSampleFromDAS(String path,
int sampleSize,
String sourceType,
int tenantId)
Generate a random sample of the dataset using Spark.
|
static double[] |
toDoubleArray(String[] inArray) |
public static org.wso2.carbon.ml.commons.domain.SamplePoints getSample(String path, String dataType, int sampleSize, boolean containsHeader, String sourceType, int tenantId) throws MLMalformedDatasetException
MLMalformedDatasetException
public static org.wso2.carbon.ml.commons.domain.SamplePoints getSampleFromDAS(String path, int sampleSize, String sourceType, int tenantId) throws MLMalformedDatasetException
MLMalformedDatasetException
public static org.apache.spark.api.java.JavaRDD<String> getLinesFromDASTable(String tableName, int tenantId, org.apache.spark.api.java.JavaSparkContext sparkContext) throws org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException, org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
public static String extractTableSchema(String path, int tenantId) throws org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException, org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
public static String extractHeaderLine(String path, int tenantId) throws org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException, org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException
org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException
public static List<Integer> getImputeFeatureIndices(org.wso2.carbon.ml.commons.domain.Workflow workflow, List<Integer> newToOldIndicesList, String imputeOption)
workflow
- Machine learning workflowimputeOption
- Impute optionpublic static int getFeatureIndex(String feature, String headerRow, String columnSeparator)
feature
- Feature nameheaderRow
- First row (header) in the data filecolumnSeparator
- Column separator characterpublic static int getFeatureIndex(String featureName, List<org.wso2.carbon.ml.commons.domain.Feature> features)
public static SortedMap<Integer,String> getIncludedFeaturesAfterReordering(org.wso2.carbon.ml.commons.domain.Workflow workflow, List<Integer> newToOldIndicesList, int responseIndex)
workflow
- Workflowpublic static SortedMap<Integer,String> getIncludedFeatures(org.wso2.carbon.ml.commons.domain.Workflow workflow, int responseIndex)
workflow
- Workflowpublic static org.wso2.carbon.ml.commons.domain.MLDatasetVersion getMLDatsetVersion(int tenantId, long datasetId, String userName, String name, String version, String targetPath)
tenantId
- Tenant ID of the current userdatasetId
- ID of the datstetuserName
- Name of the current username
- Dataset nameversion
- Dataset versiontargetPath
- path of the stored data setpublic static String getDate()
public static Properties getProperties(List<org.wso2.carbon.ml.commons.domain.config.MLProperty> mlProperties)
Properties
from a list of MLProperty
mlProperties
- list of MLProperty
Properties
public static double[] toDoubleArray(String[] inArray)
inArray
- String arraypublic static Map<String,Integer> generateHeaderMap(String line, org.apache.commons.csv.CSVFormat format)
public static int getFeatureSize(String line, org.apache.commons.csv.CSVFormat format)
public static String[] getFeatures(String line, org.apache.commons.csv.CSVFormat format)
public static org.apache.spark.api.java.JavaRDD<String[]> filterRows(String delimiter, String headerRow, org.apache.spark.api.java.JavaRDD<String> lines, List<Integer> featureIndices)
delimiter
- Column separator of the datasetheaderRow
- Header rowlines
- JavaRDD which contains the datasetfeatureIndices
- Indices of the features to apply filterpublic static String getErrorMsg(String customMessage, Exception ex)
public static <T,E> T getKeyByValue(Map<T,E> map, E value)
map
- Map to be searched for a keyvalue
- Value of the keypublic static String arrayToCsvString(String[] array, char delimiter)
array
- String array to be converteddelimiter
- Delimiter to be used (comma for CSV tab for TSV)Copyright © 2015 WSO2, Inc.. All Rights Reserved.