Development Exercises – L5.1

Created by Ramses Alexander Coraspe Valdez

Created on March 15, 2020

In [0]:
import math
import random
import csv
import timeit as tiempo
import pandas as pd
import unittest
In [0]:
class SortingAlgorithms:

        def __init__(self):
            self.fileinputpath = None
            self.fileoutputpath = None
            self.number_records = 0
            self.time_consumed = 0
            self.start_time = 0
            self.end_time = 0
            self.values = []
            self.outvalues = []

        def set_input_data(self, file_path_name):
            self.fileinputpath = file_path_name
            self.__set_values()

        def __set_values(self):
            try:
                self.values = []
                with open(self.fileinputpath) as f:
                    data = [list(map(int, rec))
                            for rec in csv.reader(f, delimiter=',')]
                    self.values = data[0]
                    self.number_records = len(self.values)
            except Exception as ex:
                raise ex

        def get_values(self):
            return self.values

        def set_output_data(self, file_path_name):
            self.fileoutputpath = file_path_name
            self.__set_out_values()

        def __set_out_values(self):
            try:
                with open(self.fileoutputpath, 'w', newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow(self.outvalues)
            except Exception as ex:
                raise ex

        def __execute_merge_sort(self, a):
            if len(a) > 1:
                middle = len(a)//2
                l = a[:middle]
                r = a[middle:]
                self.__execute_merge_sort(l)
                self.__execute_merge_sort(r)
                x = y = z = 0
                while x < len(l) and y < len(r):
                    if l[x] < r[y]:
                        a[z] = l[x]
                        x += 1
                    else:
                        a[z] = r[y]
                        y += 1
                    z += 1
                while x < len(l):
                    a[z] = l[x]
                    x += 1
                    z += 1
                while y < len(r):
                    a[z] = r[y]
                    y += 1
                    z += 1

        def execute_merge_sort(self):
            self.start_time = tiempo.default_timer()
            self.outvalues = self.values
            self.__execute_merge_sort(self.outvalues)
            self.end_time = tiempo.default_timer()
            self.time_consumed = format(self.end_time-self.start_time, '.8f')

        def __heapify(self, a, n, i):
            largest = i
            l = 2 * i + 1
            r = 2 * i + 2
            if l < n and a[i] < a[l]:
                largest = l
            if r < n and a[largest] < a[r]:
                largest = r
            if largest != i:
                a[i], a[largest] = a[largest], a[i]
                self.__heapify(a, n, largest)

        def __execute_heap_sort(self, a):
            n = len(a)
            for i in range(n, -1, -1):
                self.__heapify(a, n, i)
            for i in range(n-1, 0, -1):
                a[i], a[0] = a[0], a[i]
                self.__heapify(a, i, 0)

        def execute_heap_sort(self):
            self.start_time = tiempo.default_timer()
            self.outvalues = self.values
            self.__execute_heap_sort(self.outvalues)
            self.end_time = tiempo.default_timer()
            self.time_consumed = format(self.end_time-self.start_time, '.8f')

        def __partition(self, a, low, high):
            i = (low-1)
            pivot = a[high]
            for j in range(low, high):
                if a[j] < pivot:
                    i = i+1
                    a[i], a[j] = a[j], a[i]
            a[i+1], a[high] = a[high], a[i+1]
            return (i+1)

        def __execute_quick_sort(self, a, low, high):
            if low < high:
                pi = self.__partition(a, low, high)
                self.__execute_quick_sort(a, low, pi-1)
                self.__execute_quick_sort(a, pi+1, high)

        def execute_quick_sort(self):
            self.start_time = tiempo.default_timer()
            self.outvalues = self.values
            self.__execute_quick_sort(self.outvalues, 0, len(self.outvalues)-1)
            self.end_time = tiempo.default_timer()
            self.time_consumed = format(self.end_time-self.start_time, '.8f')

        def get_performance_data(self):
            return self.number_records, self.time_consumed, self.start_time, self.end_time

        def get_overall_stats(self):
            files = ["random_numbers_1", "random_numbers_2",
                     "random_numbers_3", "random_numbers_4",
                     "random_numbers_5"]
            Algorithms = ["Merge", "Heap", "Quick"]
            fl = []
            nr = []
            alg = []
            tc = []

            i = 0
            for a in Algorithms:
                for f in files:
                    sa = SortingAlgorithms()
                    sa.set_input_data(f + '.csv')
                    alg.append(a)
                    fl.append(f)
                    if a in 'Merge':
                        sa.execute_merge_sort()
                    elif a in 'Heap':
                        sa.execute_heap_sort()
                    else:
                        sa.execute_quick_sort()

                    sa.set_output_data(f + "_" + a)
                    r, t, st, et = sa.get_performance_data()
                    tc.append(t)
                    nr.append(r)
            results = pd.DataFrame(list(zip(alg, fl, nr, tc)),
                                   columns=['Algorithm',
                                            'File name',
                                            'Number records',
                                            'Time consumed'])
            return results
In [0]:
sa = SortingAlgorithms();
values= sa.get_overall_stats()
print(values)
   Algorithm         File name  Number records Time consumed
0      Merge  random_numbers_1             336    0.00093165
1      Merge  random_numbers_2            1126    0.00395886
2      Merge  random_numbers_3            5726    0.02513376
3      Merge  random_numbers_4          102726    0.55587225
4      Merge  random_numbers_5         1038013    7.14982969
5       Heap  random_numbers_1             336    0.00115140
6       Heap  random_numbers_2            1126    0.00493948
7       Heap  random_numbers_3            5726    0.03053414
8       Heap  random_numbers_4          102726    0.82854343
9       Heap  random_numbers_5         1038013   11.78698254
10     Quick  random_numbers_1             336    0.00064551
11     Quick  random_numbers_2            1126    0.00325437
12     Quick  random_numbers_3            5726    0.01554479
13     Quick  random_numbers_4          102726    0.46815197
14     Quick  random_numbers_5         1038013    4.65728874

Conclusions

The Quick Sort algorithm is too fast, almost half the time Merge sort algorithm uses. The tests were done using different files of different sizes, from thousands to a million numbers.

texto alternativo

The unit tests for the code are in the following url: Unit Test

The results of the coverage for the unit tests are in the following url: Coverage 98%