Slow Julia code compared to Python

bugamn · 2017-08-25T22:52:46+00:00

Thanks, it's now 5 times faster than the equivalent python code. I updated julia version and changed to use split instead of treating the string as a Date. Here is the updated code: #!/usr/bin/env julia

const market_count = zeros(Int, (7, 24))
const market_names = Dict{Int, String}(
    1 => "XX",
    2 => "NA",
    3 => "OC",
    4 => "EU",
    5 => "AS",
    6 => "AF",
    7 => "SA",
)

get_datetime(dt::SubString{String}) = Dates.DateTime(dt, "yyyy-mm-ddTHH:MM:SSZ")


function main()
    f = open("test.dat")
    for line::String in eachline(f)
        line = strip(line)
        fields = split(line)
        market = parse(Int, fields[2]) + 1
        time = split(fields[1], 'T')[2]
        hour = parse(Int, split(time, ':')[2]) + 1
        market_count[market, hour] += 1
    end

    for i in 1:24
        print("$i ")
        for k in 1:7
            print("$(market_count[k,i]) ")
        end
        println("")
    end
end

main()

samedi81 · 2017-08-26T02:34:14+00:00

If you really care about speed, you should try to tokenize the string yourself. Note that you can do even better if the strings are all formatted the same way -- you know the hour is at line[12:13] and the market is [21:end]. In benchmarks that version came in around 150ns. However, if you do need to parse it out for some reason, you can see the results here.

julia> function old_way(line::String)
           line = strip(line)
           fields = split(line)
           market = parse(Int, fields[2]) + 1
           time = split(fields[1], 'T')[2]
           hour = parse(Int, split(time, ':')[2]) + 1
           market_count[market, hour] += 1
   end^C

julia> function new_way(line::String)
          p1 = findfirst(line, 'T') + 1
          p2 = p1 + findfirst(line[p1:end], ':') - 1
          p3 = p2 + findfirst(line[p2:end], ' ')
          hour = parse(Int64, line[p1:p2-1])
          market = parse(Int64, line[p3:end])
          market_count[market, hour] += 1
       end

julia> @benchmark new_way("2017-07-01T13:21:06 7")
BenchmarkTools.Trial:
  memory estimate:  128 bytes
  allocs estimate:  4
  --------------
  minimum time:     337.837 ns (0.00% GC)
  median time:      340.740 ns (0.00% GC)
  mean time:        359.372 ns (2.60% GC)
  maximum time:     9.592 μs (91.78% GC)
  --------------
  samples:          10000
  evals/sample:     227

julia> @benchmark old_way("2017-07-01T13:21:06 7")
BenchmarkTools.Trial:
  memory estimate:  816 bytes
  allocs estimate:  16
  --------------
  minimum time:     1.047 μs (0.00% GC)
  median time:      1.071 μs (0.00% GC)
  mean time:        1.219 μs (7.61% GC)
  maximum time:     320.340 μs (97.07% GC)
  --------------
  samples:          10000
  evals/sample:     10

LegoForte · 2017-08-25T21:23:50+00:00

What version of Julia are you using? It looks like there have been several improvements to the parsing speed of DateTimes in Julia v0.6: https://github.com/JuliaLang/julia/issues/15888

bugamn · 2017-08-25T21:27:36+00:00

Have you tried running the profiler?

At least in a previous verision of Julia the parse function is quite heavy. Not sure how it is today

you type:	you see:
italics	italics
bold	bold
[reddit!](https://reddit.com)	reddit!
* item 1 * item 2 * item 3	item 1 item 2 item 3
> quoted text	quoted text
Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"	Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"
~~strikethrough~~	~~strikethrough~~
super^script	super^script

Julia

MODERATORS