// devcontainer.json
{
    "name": "FSharpCodeContainer",
    "dockerFile": "../Dockerfile", // Path to Dockerfile with environment
    "appPort": [8080],
    "extensions": [ 
        "ionide.ionide-fsharp", // F# extension
        "ms-vscode.csharp",
        "editorconfig.editorconfig",
        "ms-dotnettools.dotnet-interactive-vscode", // allows to create juypter notebooks with .NET kernel
        "cweijan.vscode-office" // can display .xlsx files in VS Code
    ],
}

#r "nuget: Deedle, 2.3.0"
#r "nuget: Plotly.NET, 2.0.0-preview.16"
#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.16"
#r "nuget: FSharp.Stats, 0.4.1"
#r "nuget: FSharpAux"
#r "nuget: BIO-BTE-06-L-7_Aux, 0.0.9"
#r "nuget: ISADotNet, 0.4.0-preview.4"
#r "nuget: ISADotNet.XLSX, 0.4.0-preview.4"
#r "nuget: ISADotNet.IO, 0.0.2"

open Deedle
open Plotly.NET
open Plotly.NET.LayoutObjects
open FSharp.Stats
open System.IO
open FSharpAux
open BIO_BTE_06_L_7_Aux.Deedle_Aux

[<Literal>]
let dataPath = @"../../assays/VP21_WC/dataset/WCAnnotated_Ratio.txt"

// quality control, check dilution series. In this example mean should always increase by times 5.
type Qprot =
    | CBB
    | PS

type PeptideIon = 
    {
        ProteinGroup    : string  
        Synonym         : string
        StringSequence  : string
        PepSequenceID   : int
        Charge          : int
        QProt           : Qprot
    }

let rowKeyColNames = [|"ProteinGroup"; "StringSequence"; "PepSequenceID"; "Charge"; "Synonym"|]

/// Open .xlsx file and index rows with column information. Then drop those columns
let readQuantAndProtFrame (path: string) =
    let frame =
        Frame.ReadCsv (path,true,separators="\t")
        |> Frame.indexRowsUsing (fun s ->
            let pGroup = 
                s.GetAs<string>("ProteinGroup")
                |> String.split ';'
                |> Array.sort
                |> String.concat ";"
            {
                ProteinGroup = pGroup
                StringSequence = s.GetAs<string>("StringSequence")
                PepSequenceID = s.GetAs<int>("PepSequenceID")
                Charge = s.GetAs<int>("Charge")
                Synonym = s.GetAs<string>("Synonym")
                QProt = 
                    match pGroup |> String.contains "QProt_newCBB", pGroup |> String.contains "QProt_newPS" with
                    | true, false  -> CBB
                    | false, true  -> PS  
                    | anythingElse -> failwith $"Error! unexpected input in cbb ps matching: {anythingElse}" 
            }
        )
    /// The following works on a mutable frame, so we can drop all columns we don't need without forwarding the frame to the next column to drop.
    rowKeyColNames 
    |> Array.map (fun name ->
        frame.DropColumn(name)
    )
    |> ignore
    frame

let data = readQuantAndProtFrame dataPath

data 
|> Frame.take 10
|> formatAsTable 500

let createBoxPlot f =
    f
    |> Frame.getNumericCols
    |> Series.map (fun k s ->
        let x,y =
            s
            |> Series.values
            |> Seq.map (fun values -> 
                string k, values
            )
            |> Seq.unzip
        Chart.BoxPlot(x, y, Orientation = StyleParam.Orientation.Vertical)
    )
    |> Series.values
    |> Chart.combine
    |> Chart.withYAxisStyle("Ratio", MinMax=(0, 100))
    |> Chart.withLegend(false)

createBoxPlot data

let normalizeFileName (f: string) = if Path.HasExtension f then f else Path.ChangeExtension(f, "wiff")

open ISADotNet
open arcIO.NET

let assayFilePath = @"../../assays/VP21_WC/isa.assay.xlsx"

/// Put these functions into a separate code block. Run them once and then work in a new block, 
/// to avoid getting an error when you have the .xlsx file open somewhere else
let _,_,_,myAssayFile = XLSX.AssayFile.Assay.fromFile assayFilePath

let inOutMap = ISADotNet.createInOutMap myAssayFile

let cbc_ps_data =
    data
    /// col keys as row keys
    |> Frame.transpose
    /// Adjust keys to represent dilution series.
    |> Frame.mapRowKeys (fun rk -> sprintf "cbc: %A" (getCBC_qconcat rk), sprintf "ps: %A" (getPS_qconcat rk), rk)
    /// Calculate mean for all rows with the same value in cbc and ps q-protein quantity (aka. the biological replicates)
    |> Frame.applyLevel (fun (cbc,ps,_) -> cbc, ps) Stats.mean
    /// Swap rows keys back to column keys
    |> Frame.transpose

cbc_ps_data
|> formatAsTable 500

VS Code Remote-Container and the ARC

Posted on 2022-4-25 by Kevin Frey in Advanced

VS Code Remote-Container and the ARC¶

Requirements¶

Set up¶

Folderstructure¶

How to start¶

⚠️ .devcontainer MUST be on root level¶

How to open/access not displayed files¶

Dependencies¶

Review Assay file¶

Data analysis¶

Data access¶

Visualisation 1¶

Visualisation 2¶

Metadata access¶

Data transformation¶

Verdict¶

Outlook¶