JAvaデータマイニングを実現する独熱符号化OneHot

2163 ワード

//     , 
    public static ArrayList oneHot(ArrayList list, int index) throws Exception {

//       
        HashSet set = new HashSet<>();
        for (String l : list) {
            set.add(l.split(",")[index]);
        }
        pln(" :");
        System.out.println(set.size());

//         
        HashMap toIndex = new HashMap<>();
        int ind = 0;
        for (String a : set) {
            toIndex.put(a, ind);
            ind++;
        }
//       
        for (int i=0; i<list.size(); i++) {
            int a[] = new int[set.size()];
            a[ toIndex.get( list.get(i).split(",")[index] ) ] = 1;
            list.set(i, list.get(i) + ","+array2string(a));
        }

        return list;
    }