Could someone please help me w.r.t the key logic behind data loaders and there after build the intuition there after.
DataLoader is a class which you can extend to your purpose, can you ask more specifically what you want to know?
Unfortunately,
I am unable to get the purpose and the method of execution.
For example as follows:
class TransliterationDataLoader(Dataset):
def __init__(self, filename):
self.eng_words, self.hindi_words = self.readXmlDataset(filename, cleanHindiVocab)
self.shuffle_indices = list(range(len(self.eng_words)))
random.shuffle(self.shuffle_indices)
self.shuffle_start_index = 0
def __len__(self):
return len(self.eng_words)
def __getitem__(self, idx):
return self.eng_words[idx], self.hindi_words[idx]
def readXmlDataset(self, filename, lang_vocab_cleaner):
transliterationCorpus = ET.parse(filename).getroot()
lang1_words = []
lang2_words = []
for line in transliterationCorpus:
wordlist1 = cleanEnglishVocab(line[0].text)
wordlist2 = lang_vocab_cleaner(line[1].text)
# Skip noisy data
if len(wordlist1) != len(wordlist2):
print('Skipping: ', line[0].text, ' - ', line[1].text)
continue
for word in wordlist1:
lang1_words.append(word)
for word in wordlist2:
lang2_words.append(word)
return lang1_words, lang2_words
def get_random_sample(self):
return self.__getitem__(np.random.randint(len(self.eng_words)))
def get_batch_from_array(self, batch_size, array):
end = self.shuffle_start_index + batch_size
batch = []
if end >= len(self.eng_words):
batch = [array[i] for i in self.shuffle_indices[0:end%len(self.eng_words)]]
end = len(self.eng_words)
return batch + [array[i] for i in self.shuffle_indices[self.shuffle_start_index : end]]
def get_batch(self, batch_size, postprocess = True):
eng_batch = self.get_batch_from_array(batch_size, self.eng_words)
hindi_batch = self.get_batch_from_array(batch_size, self.hindi_words)
self.shuffle_start_index += batch_size + 1
# Reshuffle if 1 epoch is complete
if self.shuffle_start_index >= len(self.eng_words):
random.shuffle(self.shuffle_indices)
self.shuffle_start_index = 0
return eng_batch, hindi_batch
PLEASE HELP!
1 Like