microsoft
diff --git a/‎ddlearn/DG_aug.py
Lines changed: 4 additions & 1 deletion b/‎ddlearn/DG_aug.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎ddlearn/data_preprocess/deal_dsads.py
Lines changed: 0 additions & 1 deletion b/‎ddlearn/data_preprocess/deal_dsads.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎ddlearn/data_preprocess/deal_pamap.py
Lines changed: 94 additions & 0 deletions b/‎ddlearn/data_preprocess/deal_pamap.py
Lines changed: 94 additions & 0 deletions
diff --git a/‎ddlearn/data_preprocess/deal_uschad.py
Lines changed: 68 additions & 0 deletions b/‎ddlearn/data_preprocess/deal_uschad.py
Lines changed: 68 additions & 0 deletions
diff --git a/‎ddlearn/data_util/data_preprocess_devide_domain.py
Lines changed: 97 additions & 6 deletions b/‎ddlearn/data_util/data_preprocess_devide_domain.py
Lines changed: 97 additions & 6 deletions
diff --git a/‎ddlearn/data_util/raw_aug_loader.py
Lines changed: 13 additions & 4 deletions b/‎ddlearn/data_util/raw_aug_loader.py
Lines changed: 13 additions & 4 deletions
diff --git a/‎ddlearn/main.py
Lines changed: 2 additions & 2 deletions b/‎ddlearn/main.py
Lines changed: 2 additions & 2 deletions
@@ -16,7 +16,10 @@ def __init__(self, n_feature, n_act_class, n_aug_class, dataset, dp):
         self.n_aug_class = n_aug_class
         self.dataset = dataset
         self.dp = dp
-        self.feature_module = net.Network(n_feature, dataset)
+        if dataset == 'uschad':
+            self.feature_module = net.Network_usc(n_feature, dataset)
+        else:
+            self.feature_module = net.Network(n_feature, dataset)
         self.act_cls = nn.Linear(n_feature, n_act_class)
         self.aug_cls = nn.Linear(n_feature, n_aug_class)
         self.criterion = nn.CrossEntropyLoss()
 
@@ -31,7 +31,6 @@ def load_data(root_path, winsize, overlapsize):
             #  merge p/ s01-s60 as data_sub
             data_sub = np.zeros((1, 45))
             for j in range(len(subname_list)):
-                # subfile = subname_list[j]
                 data_i = []
                 if j < 9:
                     name = '0' + str(j+1)
 
@@ -0,0 +1,94 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import numpy as np
+import os
+from math import isnan
+
+
+def load_data(root_path, winsize, overlap):
+    file_list = os.listdir(root_path)
+    list_len = len(file_list)
+    x_all, y_all, s_all = [], [], []
+    for filenum in range(list_len):
+        data_i = []
+        filename = file_list[filenum]
+        data_i = np.loadtxt(os.path.join(root_path, filename))
+        subject = int(filename.split('0')[1].split('.')[0])
+        x_i = np.hstack((data_i[:, 4:7], data_i[:, 7:13], data_i[:, 21:24],
+                         data_i[:, 27:33], data_i[:, 38:41], data_i[:, 44:50]))
+        y_i = data_i[:, 1]
+        tx, ty, ts = getwin_replace(x_i, y_i, subject,
+                                    winsize=winsize, overlap=overlap)
+        if filenum == 0:
+            x_all, y_all, s_all = tx, ty, ts
+        else:
+            x_all = np.vstack((x_all, tx))
+            y_all = np.vstack((y_all, ty))
+            s_all = np.vstack((s_all, ts))
+        print('a')
+    return x_all, y_all, s_all
+
+
+def getwin_replace(x, y, s, winsize, overlap):
+    data_num = len(x)
+    overlap_size = int(winsize*overlap)
+    stepsize = winsize-overlap_size
+    head, tail = 0, winsize
+    xx, yy = [], []
+    while tail <= data_num:
+        ry = np.unique(y[head:tail])
+        if len(ry) == 1:
+            x_win = x[head:tail, :]
+            x_new = replace_nan(x_win)
+            xx.append(x_new)
+            yy.append(y[head])
+            head += stepsize
+            tail += stepsize
+        else:
+            head = tail-1
+            while y[head] == y[head-1]:
+                head -= 1
+            tail = head + winsize
+    ss = np.ones(len(yy)) * s
+    return np.array(xx), np.array(yy).reshape(-1, 1), np.array(ss).reshape(-1, 1)
+
+
+def replace_nan(x_win):
+    x_new = []
+    for col in range(x_win.shape[1]):
+        x_col = x_win[:, col]
+        x_col_mean = calculate_mean_value(x_col)
+        index_nan = np.argwhere(np.isnan(x_col))
+        x_col[index_nan] = x_col_mean
+        if col == 0:
+            x_new = x_col.reshape(-1, 1)
+        else:
+            x_new = np.hstack((x_new, x_col.reshape(-1, 1)))
+    return x_new
+
+
+def calculate_mean_value(x):
+    x_new = []
+    for x_i in x:
+        if isnan(x_i):
+            continue
+        else:
+            x_new.append(x_i)
+    x_mean = np.mean(np.array(x_new), axis=0)
+    return x_mean
+
+
+def get_pamap_npy(root_path, save_path, winsize, overlap):
+    if os.path.exists(save_path+'pamap_processwin.npz'):
+        pass
+    else:
+        x, y, s = load_data(root_path, winsize, overlap)
+        np.savez(save_path+'pamap_processwin.npz', x=x, y=y, s=s)
+
+
+if __name__ == '__main__':
+    root_path = '/home/data/process/raw/PAMAP/PAMAP2_Dataset/Protocol/'
+    save_path = '/home/data/process/pamap/'
+    winsize = 512
+    overlap = 0.5
+    get_pamap_npy(root_path, save_path, winsize, overlap)
@@ -0,0 +1,68 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import numpy as np
+import scipy.io
+import os
+
+
+def getwin(x, y, s, winsize, overlapsize):
+    l = len(x)
+    stepsize = winsize-overlapsize
+    h, t = 0, winsize
+    xx, yy, ss = [], [], []
+    while t <= l:
+        ry = np.unique(y[h:t])
+        rs = np.unique(s[h:t])
+        if len(ry) == 1 and len(rs) == 1:
+            xx.append(x[h:t, :])
+            yy.append(y[h])
+            ss.append(s[h])
+        else:
+            print("error!")
+        h += stepsize
+        t += stepsize
+    return np.array(xx), np.array(yy).reshape(-1, 1), np.array(ss).reshape(-1, 1)
+
+
+def get_npy(root_path, save_path, winsize, overlapsize):
+    if os.path.exists(save_path+'uschad_processwin.npz'):
+        pass
+    else:
+        x, y, s = get_raw_data_deal(root_path, winsize, overlapsize)
+        np.savez(save_path+'uschad_processwin.npz', x=x, y=y, s=s)
+
+
+def get_raw_data_deal(root_path, winsize, overlapsize):
+    file_name = os.listdir(root_path)
+    x_all, y_all, s_all = np.zeros(
+        (1, winsize, 6)), np.zeros((1, 1)), np.zeros((1, 1))
+    sub_folder_list = []
+    for i in file_name:
+        if i == 'Readme.txt' or i == 'displayData_acc.m' or i == 'displayData_gyro.m':
+            continue
+        else:
+            sub_folder_list.append(i)
+    for subfolder in sub_folder_list:
+        sub = subfolder.split('t')[1]
+        path = os.path.join(root_path, subfolder)
+        file_list = os.listdir(path)
+        for file in file_list:
+            data = scipy.io.loadmat(os.path.join(path, file))
+            x, act_num = data['sensor_readings'], data['activity_number'] if 'activity_number' in data else data['activity_numbr']
+            y = np.ones(x.shape[0]) * int(act_num[0])
+            s = np.ones(x.shape[0]) * int(sub)
+            tx, ty, ts = getwin(x, y, s, winsize, overlapsize)
+            x_all, y_all, s_all = np.vstack((x_all, tx)), np.vstack(
+                (y_all, ty)), np.vstack((s_all, ts))
+    x_all, y_all, s_all = x_all[1:], y_all[1:], s_all[1:]
+    return x_all, y_all, s_all
+
+
+if __name__ == '__main__':
+    winsize = 500
+    overlap = 0.5
+    overlapsize = int(winsize*overlap)
+    root_path = '/home/data/usc-had/raw/USC-HAD/'
+    save_path = '/home/data/process/uschad/'
+    x, y, s = get_raw_data_deal(root_path, winsize, overlapsize)
+    get_npy(root_path, save_path, winsize, overlapsize)
@@ -1,15 +1,106 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+import pickle
+from sklearn.model_selection import train_test_split
+import numpy as np
+import utils
+from main import args_parse
+from raw_aug_loader import set_param
 import sys
 import os
 sys.path.append(os.path.dirname(sys.path[0]))
-from raw_aug_loader import set_param
-from main import args_parse
-import utils
-import numpy as np
-from sklearn.model_selection import train_test_split
-import pickle
+
+# ============ PAMAP2 ===============
+
+
+def merge_split_pamap(seed, root_path="/home/data/process/pamap/pamap_processwin.npz", n_domain=4, save_file='/home/data/process/pamap/pamap_subject_final.pkl'):
+    d = np.load(root_path)
+    x, y, s = d['x'], d['y'].reshape(-1,), d['s'].reshape(-1,)
+    x_new, y_new, s_new = select_sub_act(x, y, s)
+    y_new = y_new-1
+    s_new = s_new-1
+    data_lst = []
+    for i in range(n_domain):
+        data_i = []
+        d_index = np.argwhere((s_new == 2*i) | (s_new == 2*i+1)).reshape(-1,)
+        x_i = x_new[d_index, :, :]
+        y_i = y_new[d_index]
+        data_i.append(x_i)
+        data_i.append(y_i)
+        data_lst.append(data_i)
+
+    devide_train_val_test(data_lst, n_domain, save_file, seed)
+
+
+def select_sub_act(x, y, s):
+    x_new, y_new, s_new = [], [], []
+    sub_list = [1, 2, 3, 4, 5, 6, 7, 8]
+    act_list = [1, 2, 3, 4, 12, 13, 16, 17]
+    for index in range(len(y)):
+        if (s[index] in sub_list) and (y[index] in act_list):
+            x_new.append(x[index])
+            y_new.append(y[index])
+            s_new.append(s[index])
+        else:
+            continue
+    x_new, y_new, s_new = np.array(x_new), np.array(y_new), np.array(s_new)
+    index_5 = np.argwhere(y_new == 12)
+    y_new[index_5] = 5
+    index_6 = np.argwhere(y_new == 13)
+    y_new[index_6] = 6
+    index_7 = np.argwhere(y_new == 16)
+    y_new[index_7] = 7
+    index_8 = np.argwhere(y_new == 17)
+    y_new[index_8] = 8
+    return x_new, y_new, s_new
+
+
+# ============ USC-HAD ===============
+def merge_split_uschad(seed, root_path='/home/data/process/uschad/uschad_processwin.npz', n_domain=5, save_file='/home/data/process/uschad/uschad_subject_final.pkl'):
+    d = np.load(root_path)
+    x, y, s = d['x'], (d['y']-1).reshape(-1,), d['s'].reshape(-1,)
+    data_lst = []
+    data_0, data_1, data_2, data_3, data_4 = [], [], [], [], []
+
+    d_index_0 = np.argwhere((s == 1) | (s == 3) | (s == 10)).reshape(-1,)
+    x_0 = x[d_index_0]
+    y_0 = y[d_index_0]
+    data_0.append(x_0)
+    data_0.append(y_0)
+    data_lst.append(data_0)
+
+    d_index_1 = np.argwhere((s == 2) | (s == 5) | (s == 13)).reshape(-1,)
+    x_1 = x[d_index_1]
+    y_1 = y[d_index_1]
+    data_1.append(x_1)
+    data_1.append(y_1)
+    data_lst.append(data_1)
+
+    d_index_2 = np.argwhere((s == 4) | (s == 7) | (s == 9)).reshape(-1,)
+    x_2 = x[d_index_2]
+    y_2 = y[d_index_2]
+    data_2.append(x_2)
+    data_2.append(y_2)
+    data_lst.append(data_2)
+
+    d_index_3 = np.argwhere((s == 6) | (s == 8) | (s == 14)).reshape(-1,)
+    x_3 = x[d_index_3]
+    y_3 = y[d_index_3]
+    data_3.append(x_3)
+    data_3.append(y_3)
+    data_lst.append(data_3)
+
+    d_index_4 = np.argwhere((s == 11) | (s == 12)).reshape(-1,)
+    x_4 = x[d_index_4]
+    y_4 = y[d_index_4]
+    data_4.append(x_4)
+    data_4.append(y_4)
+    data_lst.append(data_4)
+
+    devide_train_val_test(data_lst, n_domain, save_file, seed)
+
+# =================DSADS=====================
 
 
 def merge_split_dsads(seed, root_path='/home/data/process/dsads/dsads_processwin.npz', n_domain=4, save_file='/home/data/process/dsads/dsads_subject_final.pkl'):
 
@@ -128,9 +128,17 @@ def reshape_data(x, dataset, when):
     if when == 'begin':
         if dataset == 'dsads':
             x = x.reshape(-1, 45)
+        elif dataset == 'uschad':
+            x = x.reshape(-1, 6)
+        elif dataset == 'pamap':
+            x = x.reshape(-1, 27)
     elif when == 'end':
         if dataset == 'dsads':
             x = x.reshape(-1, 125, 45)
+        elif dataset == 'uschad':
+            x = x.reshape(-1, 500, 6)
+        elif dataset == 'pamap':
+            x = x.reshape(-1, 512, 27)
     else:
         print("error")
     return x
@@ -171,15 +179,17 @@ def pick_data(data, data_type, data_name, src):
 def set_param(dataset):
     if dataset == 'dsads':
         n_domain = 4
-    else:
-        print("no matching dataset")
+    elif dataset == 'pamap':
+        n_domain = 4
+    elif dataset == 'uschad':
+        n_domain = 5
     return n_domain
 
 
 if __name__ == "__main__":
     args = args_parse()
     root_path = "/home/data/process/"
-    for args.dataset in ['dsads']:
+    for args.dataset in ['dsads','pamap','uschad']:
         n_domain = set_param(args.dataset)
         for args.scaler_method in ['minmax']:
             for remain_data_rate in [0.2, 0.4, 0.6, 0.8, 1.0]:
@@ -207,4 +217,3 @@ def set_param(dataset):
                         }
                         with open(save_path, 'wb') as f:
                             pickle.dump(raw_and_aug, f)
-    print("successful")
 
@@ -41,9 +41,9 @@ def args_parse():
     parser.add_argument('--root_path', type=str,
                         default="/home/ddlearn/data/")
     parser.add_argument('--data_save_path', type=str,
-                        default='/home/ddlearn/data/')
+                        default="/home/ddlearn/data/")
     parser.add_argument('--save_path', type=str,
-                        default="/home/ddlearn/results/")
+                        default="/home/results/")
 
     args = parser.parse_args()
     args.step_per_epoch = 100000000000