Let's read the following data into a Pandas DataFrame with the appropriate labels:
train_path = data_path/'train'test_path = data_path/'test'def read_data(dir_path): """read data into pandas dataframe""" def load_dir_reviews(reviews_path): files_list = list(reviews_path.iterdir()) reviews = [] for filename in files_list: f = open(filename, 'r', encoding='utf-8') reviews.append(f.read()) return pd.DataFrame({'text':reviews}) pos_path = dir_path/'pos' neg_path = dir_path/'neg' pos_reviews, neg_reviews = load_dir_reviews(pos_path), load_dir_reviews(neg_path) pos_reviews['label'] = 1 neg_reviews['label'] = 0 merged = pd.concat([pos_reviews, neg_reviews]) merged.reset_index(inplace=True) return merged
This function reads the ...