/* 

  Parse a (Weka) ARFF file in Picat.

  The ARFF format is used in the Weka Machine Learning/Data Mining
  system. See more about ARFF here:
  https://www.cs.waikato.ac.nz/ml/weka/arff.html

  Usage:
   $ picat parse_arff.pi file.arff class_index pct_split no_shuffle

  - file.arff: An ARFF file
  - class_index: The position of the class/output. 
    -1 means the last attribute (which is quite common in ARFF files)
  - pct_split: The percentage split of Train and Test datasets.
    Set 1 for no split, i.e. just the training set.
  - no_shuffle: The dataset is normally shuffled before writing to files.
    Set 1 if the dataset should not be shuffled.

  Notes:
  - the files generated are
      file.arff_train.data
      file.arff_test.data
    Both files includes the header of [num_instances,input_size,output_size].

  - This program don't handle missing values. One reason is I don't know
    how to handle this in the FANN data files.

  For some ARFF data set, see my Weka page: http://www.hakank.org/weka/

  This Picat model was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import util.
import nn_hakank.

main => go.

main(ARGS) =>
   println(ARGS),
   Len = ARGS.len,
   Pct = 0.8,
   ClassIx = -1,
   NoShuffle = 0,
   if Len < 1 then
     println("Syntax: parse_arff.pi file.arff class_index pct_split no_shuffle"),
     halt
   else 
     File = ARGS[1],
     if Len > 1 then
       ClassIx := to_int(ARGS[2])
     end,
     if Len > 2 then
       Pct := to_float(ARGS[3]),
       println(pct=Pct)
     end,
     if Len > 3 then
       Tmp := to_int(ARGS[4]),
       if Tmp == 1 then
         NoShuffle := 1
       end
     end          

   end,
   println([file=File,class_index=ClassIx,pct=Pct,no_shuffle=NoShuffle]),
   parse_and_split_file(File,ClassIx,Pct,NoShuffle).

go =>
     println("Syntax: parse_arff.pi file.arff class_index pct_split no_shuffle"),
     nl.