Importing CSV term sets into SharePoint 2010 using F#

20 May 2012

Source code available through Github.
See also: Importing CSV term sets into SharePoint 2010 using PowerShell.

Here I provide an F# implementation of a Powershell script for populating the SharePoint 2010 term store with a term set. Each term set is stored in a CSV file (whose format is defined by Microsoft) that drives the calls made to the SharePoint .NET APIs to import it:

|   |       0       |       5      |       6      | ... |      11      |
|---|---------------|--------------|--------------|-----|--------------|
| 0 | Term Set Name | Level 1 Term | Level 2 Term | ... | Level 7 Term |
| 1 | TermSetName   |              |              | ... |              |
| 2 |               | Term1        |              | ... |              |
| 3 |               | Term2        | Term2.2      | ... |              |

First the function signatures:

// Program.fsi
module Bugfree.SharePoint.TermSetImporter

open Microsoft.SharePoint.Taxonomy

val importCsv : string -> seq<string[]>
val removeTermGroup : TermStore -> string -> unit
val getOrCreate : 'b -> seq<'a> -> ('a -> bool) -> ('b -> 'a) -> 'a
val getOrCreateGroup : string -> TermStore -> Group
val getOrCreateSet : string -> Group -> TermSet
val getOrCreateTerm : string -> TermSetItem -> Term
val importTerm : TermSetItem -> string list -> TermSetItem
val importTermSet : TermStore -> string -> seq<string[]> -> unit 
val main : string[] -> int

Then the implementation:

// Program.fs
module Bugfree.SharePoint.TermSetImporter

open System.IO
open System.Collections.Generic
open Microsoft.SharePoint
open Microsoft.SharePoint.Taxonomy

let importCsv path =
    seq { use sr = File.OpenText(path)
          while not sr.EndOfStream do                  
            let line = sr.ReadLine()                  
            let tokens = line.Split [|','|]                  
            yield tokens }

let removeTermGroup (store : TermStore) name =
    store.Groups |> Seq.filter(fun g -> g.Name = name) 
                 |> Seq.iter(fun g -> g.TermSets |> Seq.iter(fun t -> t.Delete())
                                      g.Delete()
                                      store.CommitAll())

let getOrCreate name children predicate create =   
    match (Seq.tryFind predicate children) with
    | Some c -> c
    | None -> create name

let getOrCreateGroup name (store : TermStore) =
    getOrCreate name store.Groups (fun g -> g.Name = name) 
                                  (fun name -> store.CreateGroup(name))

let getOrCreateSet name (group : Group) =
    getOrCreate name group.TermSets (fun s -> s.Name = name) 
                                    (fun name -> group.CreateTermSet(name))

let getOrCreateTerm name (item : TermSetItem) =
    getOrCreate name item.Terms (fun t -> t.Name = name) 
                                (fun name -> item.CreateTerm(name, 1033))

let rec importTerm (parent : TermSetItem) levels =       
    match levels with
    | [] -> parent
    | head::tail -> let t = getOrCreateTerm head parent
                    importTerm t tail
                               
let importTermSet (store : TermStore) groupName (rows : seq<string[]>) =  
    let termSetName = (rows |> Seq.nth 1).[0].Replace("\"", "")
    let termSet = getOrCreateGroup groupName store |> getOrCreateSet termSetName
    
    rows |> Seq.skip 2 
         |> Seq.iter(fun r -> r.[5..] |> Array.filter(fun i -> i <> "") 
                                      |> Array.map(fun i -> i.Replace("\"", ""))
                                      |> Array.toList
                                      |> importTerm termSet
                                      |> ignore)
    store.CommitAll()

[<EntryPoint>]
let main args =
    let siteCollection = new SPSite("http://sp2010")
    let session = new TaxonomySession(siteCollection)
    let store = session.TermStores.["Managed Metadata Service"]    
    let rows = importCsv "C:\Test.csv"
    let groupName = "MyGroup"
    removeTermGroup store groupName
    importTermSet store groupName rows
    0

Maybe it's implementing the population algorithm twice, but to me the F# implementation seems the easiest one to implement and follow. Not because PowerShell is a bad language, but because F#'s static typing and type inferencing makes it better suited for implementing something of this relative complexity.