aboutsummaryrefslogtreecommitdiff
path: root/ch24/24_b_1.hs
blob: aa783c6aff6f8e6e46cb8725f065b65f4f9bd8ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
-- 1. It can be difficult to determine when to switch from parSort2 to sort. An
--    alternative approach to the one we outline above would be to decide based
--    on the length of a sublist. Rewrite parList2 so that it switches to sort
--    if the list contains more than some number of elements.
--
-- 2. Measure the performance of the length-based approach, and compare with the
--    depth approach. Which gives better performance results?


-- There is most probably a typo in the assignment. The
--   "if the list contains more than"
-- should be
--   "if the list contains less than".
--
-- parSort2 switches to sort if the level is deeper than a specified level. The
-- deeper the level is, the shorter the sublist is. It makes sense to use the
-- sequential sort for a short list because the overhead of creating threads
-- would be too high in proportion to just sequentially sorting the list.
--
-- partList2 should do that too (switching to sort if the list is shorter than
-- the threshold), it should just decide based on the length of a sublist.


-- Comparing performance of algorithms is not easy, especially for average cases
-- of inputs. Thus, instead of in-depth analysis and comparison I will just do a
-- few measurements and based on that I will set a general result. For the
-- comparison to make more sense, both of the implementations will run on the
-- same random input.


{-- From examples/examples/ch24/SortMain.hs and modified --}
module Main where

import Data.Time.Clock (diffUTCTime, getCurrentTime, NominalDiffTime)
import System.Environment (getArgs)
import System.Random (StdGen, getStdGen, randoms)

import Sorting

import Control.Parallel (par, pseq)

randomInts :: Int -> StdGen -> [Int]
randomInts k g = let result = take k (randoms g)
                 in force result `seq` result

main = do
  args <- getArgs
  let (inputSize, paramSort, paramList, avgRuns) =
        case args of
          (c:ps:pl:a:_) -> (read c, read ps, read pl, read a)
          _ -> error "Missing arguments"
  input <- randomInts inputSize `fmap` getStdGen
  putStrLn $ "We have " ++ show (length input) ++ " elements to sort."

  s <- timedAvg (parSort2 paramSort) input avgRuns
  putStrLn $ show s

  t <- timedAvg (parList2 paramList inputSize) input avgRuns
  putStrLn $ show t

timedAvg :: ([Int] -> [Int]) -> [Int] -> Int -> IO NominalDiffTime
timedAvg sortFunc input runCount = do
  times <- mapM (\_ -> timed sortFunc input) [1..runCount]
  return $ (sum times) / (fromIntegral (length times))

timed :: ([Int] -> [Int]) -> [Int] -> IO NominalDiffTime
timed sortFunc input = do
  start <- getCurrentTime
  let sorted = sortFunc input
  putStr $ "Sorted all " ++ show (length sorted) ++ " in "
  end <- getCurrentTime
  let diff = end `diffUTCTime` start
  putStrLn $ show diff
  return $ diff
{-- End of code from examples --}

parList2 :: (Ord a) => Int -> Int -> [a] -> [a]
parList2 l min list@(x:xs)
  | l < min   = sort list
  | otherwise = force greater `par` (force lesser `pseq`
                                      (lesser ++ x:greater))
  where
        le      = [y | y <- xs, y <  x]
        gr      = [y | y <- xs, y >= x]
        leLen   = length le
        grLen   = length gr
        lesser  = parList2 leLen min le
        greater = parList2 grLen min gr
parList2 _ _ _ = []


-- Choice of pivot for Quicksort is very important. In the parSort2
-- implementation from the examples, the pivot is the first item in a list to
-- sort.
--
-- This means, a sorted list is the worst case. For ascending order, the
-- 'lesser' array will always contain no elements, the 'greater' list will
-- always contain all the elements. Thus, the sorting will be slowest (time
-- complexity O(n^2)). Similarly for descending order.
--
-- The best case is when the first item in the list is the middle value, it
-- means, when half of the items in the list are smaller and half of them bigger
-- than the middle value. This results in a balanced split of the sorted list
-- and the sorting depth will be log(n) (time complexity O(n log(n))).


-- My hypothesis was that for the best case, parSort2 and parList2 would perform
-- similarly because a depth limit for parSort2 can be converted into a list
-- length limit for parList2, so both would be switching to the sequential
-- 'sort' at the same time.
--
-- The closest the input list would be to the worst case, the more difference
-- would there be. parList2 would perform much better thanks to fewer
-- applications of 'par'.
--
-- After running a few worst case experiments I found out that the quadratic
-- time complexity by far outweighs the speedup gained from reducing
-- applications of 'par', so I used only average/random inputs.


-- I compiled the source with
--   stack ghc -- -threaded 24_b_1.hs

-- My computer has 4 cores. The command I used for running the sort was
--   ./24_b_1 +RTS -N4 -RTS <INPUT_SIZE> <PARSORT_PARAM> <PARLIST_PARAM> <AVG_RUNS>
--
-- where INPUT_SIZE is number of elements of the list to sort, PARSORT_PARAM is
-- the parameter to parSort2 (the threshold depth), PARLIST_PARAM is the
-- parameter to parList2 (the threshold length), and AVG_RUNS is number of runs
-- of each sort implementation for computing the average sorting time.

-- The input sizes tested were 4 000 000 and 400 000 elements. The testing was
-- automated using a shell script. I set the parameters by computing binary
-- logarithm from the size of the input. This is the max. depth of a balanced
-- binary tree, it means, max. depth for a best case list. In the result tables
-- below this computed parameter is in parentheses. Then I tested lower and a
-- few greater parameter values. For parSort2 the value is the depth, for
-- parList2 it is a length of a sublist + 1 (because of 'l < min' in the
-- implementation I wanted the switch to the sequential sort to happen also at
-- the threshold value).

-- The results for input size 4 000 000:
--
-- PARSORT_PARAM  PARLIST_PARAM   parSort2_time   parList2_time  is_parList2_faster
--            26       67108865  16.1435506608s  16.5742411336s
--            24       16777217  17.2344045572s  15.0797320632s  yes
--          (22)      (4194305)  16.747620797s   17.9526976908s
--            20        1048577  15.8036009976s  14.9289581244s  yes
--            18         262145  14.9604795514s  15.0083064052s
--            16          65537  13.5984633668s  14.8022182528s
--            14          16385  15.8516987842s  17.8260459492s
--            12           4097  15.4590705512s  15.6482811018s
--            10           1025  13.8612868966s  14.5261572668s
--             8            257  13.1958234944s  14.6600659704s
--             6             65  15.4553144422s  14.8244605894s  yes
--             4             17  13.5678723824s  15.7817085602s
--             2              5  14.6244923772s  15.7249399864s
--             0              2  15.7628948616s  15.7540970314s  yes

-- The results for input size 400 000:
--
-- PARSORT_PARAM  PARLIST_PARAM  parSort2_time  parList2_time  is_parList2_faster
--            26       67108865  0.808638032s   0.8980840812s
--            24       16777217  0.9464536416s  1.0021756256s
--          (22)      (4194305)  0.9558710392s  1.0741807468s
--            20        1048577  0.764964904s   0.9638306786s
--            18         262145  1.0843675078s  1.1163900494s
--            16          65537  0.92859031s    0.96355397s
--            14          16385  0.9129570608s  1.0464927062s
--            12           4097  1.1209530828s  1.1124485972s  yes
--            10           1025  0.8144274202s  0.987780161s
--             8            257  1.0196146734s  1.0743890808s
--             6             65  0.9790765378s  0.9524003582s  yes
--             4             17  0.8646994942s  0.902187454s
--             2              5  0.8896786326s  0.9591653616s
--             0              2  1.0028687866s  0.9903121374s  yes

-- Overall, parSort2 performs better than parList2.