BioFSharp


Sample: Clustering iris data set

*

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
#load "FSharpML.fsx"


open System;
open Microsoft.ML
open Microsoft.ML.Data
open FSharpML
open FSharpML.Data
open FSharpML.EstimatorModel
open FSharpML.TransformerModel


/// Describes Iris flower. Used as an input to prediction function.
[<CLIMutable>] 
type IrisData = {
    Label : float32
    SepalLength : float32
    SepalWidth: float32
    PetalLength : float32
    PetalWidth : float32    
} 




//Create the MLContext to share across components for deterministic results
let mlContext = MLContext(seed = Nullable 1) // Seed set to any number so you
                                             // have a deterministic environment

// STEP 1: Common data loading configuration
let fullData =
    let hasHeader = true
    let separatorChar = '\t'
    let columns =
        [|
            TextLoader.Column("Label", DataKind.Single, 0)
            TextLoader.Column("SepalLength", DataKind.Single, 1)
            TextLoader.Column("SepalWidth", DataKind.Single, 2)
            TextLoader.Column("PetalLength", DataKind.Single, 3)
            TextLoader.Column("PetalWidth", DataKind.Single, 4)
        |]

    __SOURCE_DIRECTORY__  + "./data/iris-full.txt"
    |> Data.loadFromTextFile mlContext separatorChar hasHeader columns    
    |> DataModel.ofDataview<string> mlContext


// (Optional) Peek data 
let plot1 = 
    mlContext.Data.CreateEnumerable<IrisData>(fullData.Dataview,false)
    |> Seq.groupBy (fun items -> items.Label)
    |> Seq.map (fun (k,values) -> 
        let x = values |> Seq.map (fun items -> items.SepalLength) 
        let y = values |> Seq.map (fun items -> items.SepalWidth) 
        Chart.Point(x,y,Name=sprintf "Label: %.0f" k)
        )
    |> Chart.Combine
No value has been returned
 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
//Split dataset in two parts: TrainingData (80%) and TestData (20%)
let trainingData, testingData = 
    fullData
    |> DataModel.trainTestSplit 0.2 



//STEP 2: Process data, create and train the model 
let model = 
    EstimatorModel.create mlContext
    // Process data transformations in pipeline
    |> EstimatorModel.appendBy (fun mlc -> mlc.Transforms.Concatenate(DefaultColumnNames.Features , "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") )
    // Create the model
    |> EstimatorModel.appendBy (fun mlc -> 
            mlc.Clustering.Trainers.KMeans(
                        featureColumnName = DefaultColumnNames.Features, 
                        numberOfClusters = 3
                    ) )
    // Train the model
    |> EstimatorModel.fit trainingData.Dataview

// STEP3: Run the prediciton on the test data
let predictions =
    model
    |> TransformerModel.transform testingData.Dataview

// STEP4: Evaluate accuracy of the model
let metrics = 
    model
    |> Evaluation.Clustering.evaluateWith(Score=DefaultColumnNames.Score, Features=DefaultColumnNames.Features) testingData.Dataview
namespace FSharp
namespace FSharp.Plotly
namespace System
namespace Microsoft
namespace Microsoft.ML
namespace Microsoft.ML.Data
module FSharpML
module Data

from FSharpML
namespace FSharpML.EstimatorModel
namespace FSharpML.TransformerModel
type IrisData =
  { Label: obj
    SepalLength: obj
    SepalWidth: obj
    PetalLength: obj
    PetalWidth: obj }


 Describes Iris flower. Used as an input to prediction function.
IrisData.Label: Microsoft.FSharp.Core.obj
IrisData.SepalLength: Microsoft.FSharp.Core.obj
IrisData.SepalWidth: Microsoft.FSharp.Core.obj
IrisData.PetalLength: Microsoft.FSharp.Core.obj
IrisData.PetalWidth: Microsoft.FSharp.Core.obj
val mlContext : MLContext
Multiple items
type MLContext =
  new : ?seed:Nullable<int> -> MLContext
  member AnomalyDetection : AnomalyDetectionCatalog
  member BinaryClassification : BinaryClassificationCatalog
  member Clustering : ClusteringCatalog
  member ComponentCatalog : ComponentCatalog
  member Data : DataOperationsCatalog
  member Forecasting : ForecastingCatalog
  member Model : ModelOperationsCatalog
  member MulticlassClassification : MulticlassClassificationCatalog
  member Ranking : RankingCatalog
  ...

--------------------
MLContext(?seed: Nullable<Microsoft.FSharp.Core.int>) : MLContext
Multiple items
type Nullable =
  static member Compare<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> int
  static member Equals<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> bool
  static member GetUnderlyingType : nullableType:Type -> Type

--------------------
type Nullable<'T (requires default constructor and value type and 'T :> ValueType)> =
  struct
    new : value:'T -> Nullable<'T>
    member Equals : other:obj -> bool
    member GetHashCode : unit -> int
    member GetValueOrDefault : unit -> 'T + 1 overload
    member HasValue : bool
    member ToString : unit -> string
    member Value : 'T
  end

--------------------
Nullable ()
Nullable(value: 'T) : Nullable<'T>
val fullData : Microsoft.FSharp.Core.obj
val hasHeader : Microsoft.FSharp.Core.bool
val separatorChar : Microsoft.FSharp.Core.char
val columns : TextLoader.Column Microsoft.FSharp.Core.[]
type TextLoader =
  member GetOutputSchema : unit -> DataViewSchema
  member Load : source:IMultiStreamSource -> IDataView
  nested type Column
  nested type Options
  nested type Range
type Column =
  new : unit -> Column + 3 overloads
  val Name : string
  val Source : Range[]
  val KeyCount : KeyCount
  member DataKind : DataKind with get, set
type DataKind =
  | SByte = 1uy
  | Byte = 2uy
  | Int16 = 3uy
  | UInt16 = 4uy
  | Int32 = 5uy
  | UInt32 = 6uy
  | Int64 = 7uy
  | UInt64 = 8uy
  | Single = 9uy
  | Double = 10uy
  ...
field DataKind.Single: DataKind = 9uy
Multiple items
module Data

from FSharpML

--------------------
namespace Microsoft.ML.Data

--------------------
namespace System.Data
val loadFromTextFile : mlc:MLContext -> separatorChar:Microsoft.FSharp.Core.char -> hasHeader:Microsoft.FSharp.Core.bool -> columns:TextLoader.Column Microsoft.FSharp.Core.[] -> path:Microsoft.FSharp.Core.string -> IDataView
module DataModel

from FSharpML
val ofDataview<'info> : mlc:MLContext -> dataview:IDataView -> DataModel.DataModel<Microsoft.FSharp.Core.obj>
val plot1 : '_arg3
property MLContext.Data: DataOperationsCatalog with get
DataOperationsCatalog.CreateEnumerable<'TRow (requires default constructor and reference type)>(data: IDataView, reuseRowObject: Microsoft.FSharp.Core.bool,?ignoreMissingColumns: Microsoft.FSharp.Core.bool,?schemaDefinition: SchemaDefinition) : Collections.Generic.IEnumerable<'TRow>
module Seq

from FSharp.Plotly
type Chart =
  static member Area : xy:seq<#IConvertible * #IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a4 -> GenericChart
  static member Area : x:seq<#IConvertible> * y:seq<#IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a4 -> GenericChart
  static member Bar : keysvalues:seq<#IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member Bar : keys:seq<#IConvertible> * values:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member BoxPlot : xy:seq<'a0 * 'a1> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Fillcolor:'a3 * ?Opacity:float * ?Whiskerwidth:'a4 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a5 * ?Pointpos:'a6 * ?Orientation:Orientation -> GenericChart
  static member BoxPlot : ?x:'a0 * ?y:'a1 * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Fillcolor:'a3 * ?Opacity:float * ?Whiskerwidth:'a4 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a5 * ?Pointpos:'a6 * ?Orientation:Orientation -> GenericChart
  static member Bubble : xysizes:seq<#IConvertible * #IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a3 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font -> GenericChart
  static member Bubble : x:seq<#IConvertible> * y:seq<#IConvertible> * sizes:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a3 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font -> GenericChart
  static member ChoroplethMap : locations:seq<string> * z:seq<#IConvertible> * ?Text:seq<#IConvertible> * ?Locationmode:LocationFormat * ?Autocolorscale:bool * ?Colorscale:Colorscale * ?Colorbar:'a2 * ?Marker:Marker * ?Zmin:'a3 * ?Zmax:'a4 -> GenericChart
  static member Column : keysvalues:seq<#IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  ...
static member Chart.Point : xy:Microsoft.FSharp.Collections.seq<#IConvertible * #IConvertible> * ?Name:Microsoft.FSharp.Core.string * ?Showlegend:Microsoft.FSharp.Core.bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:'a2 * ?Opacity:Microsoft.FSharp.Core.float * ?Labels:Microsoft.FSharp.Collections.seq<#IConvertible> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font -> GenericChart.GenericChart
static member Chart.Point : x:Microsoft.FSharp.Collections.seq<#IConvertible> * y:Microsoft.FSharp.Collections.seq<#IConvertible> * ?Name:Microsoft.FSharp.Core.string * ?Showlegend:Microsoft.FSharp.Core.bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:'a2 * ?Opacity:Microsoft.FSharp.Core.float * ?Labels:Microsoft.FSharp.Collections.seq<#IConvertible> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font -> GenericChart.GenericChart
static member Chart.Combine : gCharts:Microsoft.FSharp.Collections.seq<GenericChart.GenericChart> -> GenericChart.GenericChart
val trainingData : 'a
val testingData : 'a
val trainTestSplit : testfraction:Microsoft.FSharp.Core.float -> dataModel:DataModel.DataModel<'a> -> DataModel.DataModel<DataModel.TrainTestSplitInfo> * DataModel.DataModel<DataModel.TrainTestSplitInfo>
val model : '_arg3 (requires member ( |> ) and member ( |> ) and 'a :> ITransformer and reference type and 'c :> ITransformer and reference type)
Multiple items
module EstimatorModel

from FSharpML.EstimatorModel

--------------------
namespace FSharpML.EstimatorModel

--------------------
type EstimatorModel<'a (requires 'a :> ITransformer and reference type)> =
  { EstimatorChain: EstimatorChain<'a>
    Context: MLContext }
val create : mlContext:MLContext -> EstimatorModel<'a> (requires reference type and 'a :> ITransformer)
val appendBy : transforming:(MLContext -> #IEstimator<'c>) -> estimatorModel:EstimatorModel<'d> -> EstimatorModel<'c> (requires 'c :> ITransformer and reference type and 'd :> ITransformer and reference type)
val mlc : MLContext
property MLContext.Transforms: TransformsCatalog with get
(extension) TransformsCatalog.Concatenate(outputColumnName: Microsoft.FSharp.Core.string, [<ParamArray>] inputColumnNames: Microsoft.FSharp.Core.string Microsoft.FSharp.Core.[]) : Transforms.ColumnConcatenatingEstimator
module DefaultColumnNames

from FSharpML
val Features : Microsoft.FSharp.Core.string
property MLContext.Clustering: ClusteringCatalog with get
property ClusteringCatalog.Trainers: ClusteringCatalog.ClusteringTrainers with get
(extension) ClusteringCatalog.ClusteringTrainers.KMeans(options: Trainers.KMeansTrainer.Options) : Trainers.KMeansTrainer
(extension) ClusteringCatalog.ClusteringTrainers.KMeans(?featureColumnName: Microsoft.FSharp.Core.string,?exampleWeightColumnName: Microsoft.FSharp.Core.string,?numberOfClusters: Microsoft.FSharp.Core.int) : Trainers.KMeansTrainer
val fit : data:IDataView -> estimatorModel:EstimatorModel<'a> -> TransformerModel<'a> (requires 'a :> ITransformer and reference type)
val predictions : '_arg3
Multiple items
module TransformerModel

from FSharpML.TransformerModel

--------------------
namespace FSharpML.TransformerModel

--------------------
type TransformerModel<'a (requires 'a :> ITransformer and reference type)> =
  { TransformerChain: TransformerChain<'a>
    Context: MLContext }
val transform : data:IDataView -> transformerModel:TransformerModel<'b> -> IDataView (requires 'b :> ITransformer and reference type)
val metrics : '_arg3
module Evaluation

from FSharpML.TransformerModel
Multiple items
module Clustering

from FSharpML.TransformerModel.Evaluation

--------------------
type Clustering =
  static member evaluateWith : ?Label:string * ?Score:string * ?Features:string -> (IDataView -> TransformerModel<'a0> -> ClusteringMetrics) (requires 'a0 :> ITransformer and reference type)
static member Evaluation.Clustering.evaluateWith : ?Label:Microsoft.FSharp.Core.string * ?Score:Microsoft.FSharp.Core.string * ?Features:Microsoft.FSharp.Core.string -> (IDataView -> TransformerModel<'a0> -> ClusteringMetrics) (requires 'a0 :> ITransformer and reference type)
val Score : Microsoft.FSharp.Core.string
val testingData : 'a Microsoft.FSharp.Core.[]
Fork me on GitHub