2013-10-31 15:23:42,304 INFO platform.PlatformRunner (PlatformRunner.java:getPlatformClassesFromClasspath(187)) - found 1 test platforms from classpath
2013-10-31 15:23:42,322 INFO platform.PlatformRunner (PlatformRunner.java:addPlatform(237)) - installing platform: hadoop
2013-10-31 15:23:42,323 INFO platform.PlatformRunner (PlatformRunner.java:addPlatform(238)) - running test: cascading.DistanceUseCasePlatformTest
2013-10-31 15:23:42,342 INFO hadoop.HadoopPlatform (HadoopPlatform.java:setUp(112)) - not using cluster
2013-10-31 15:23:42,681 INFO util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:23:42,682 INFO planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:23:42,693 INFO property.AppProps (AppProps.java:getAppID(162)) - using app.id: DA2015CDEA6F4DF59CCA8D7592DD44F0
2013-10-31 15:23:42,897 INFO util.Version (Version.java:printBanner(78)) - Concurrent, Inc - Cascading 2.2.0
2013-10-31 15:23:42,899 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:23:42,900 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:23:42,900 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite"]
2013-10-31 15:23:42,900 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] parallel execution is enabled: false
2013-10-31 15:23:42,900 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting jobs: 3
2013-10-31 15:23:42,900 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] allocating threads: 1
2013-10-31 15:23:42,901 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:23:42,928 WARN util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(52)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2013-10-31 15:23:42,933 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:42,933 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:42,957 INFO security.ShellBasedUnixGroupsMapping (ShellBasedUnixGroupsMapping.java:getUnixGroups(89)) - add hadoop to shell userGroupsCache
2013-10-31 15:23:42,964 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:42,980 WARN snappy.LoadSnappy (LoadSnappy.java:<clinit>(46)) - Snappy native library not loaded
2013-10-31 15:23:42,984 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:43,191 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0001
2013-10-31 15:23:43,222 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:43,230 INFO util.ProcessTree (ProcessTree.java:isSetsidSupported(63)) - setsid exited with exit code 0
2013-10-31 15:23:43,233 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@25e1e5e3
2013-10-31 15:23:43,238 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:23:43,243 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:43,249 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:43,291 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:43,291 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:43,311 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:43,311 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:23:43,377 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:23:43,377 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:23:43,393 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:43,403 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:43,405 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:46,226 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:23:46,227 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0001_m_000000_0' done.
2013-10-31 15:23:46,242 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:46,244 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@6f28deae
2013-10-31 15:23:46,245 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:46,255 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:46,257 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:23:46,257 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:46,258 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:46,259 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:23:46,276 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:23:46,276 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][3839691910/euclidean/]
2013-10-31 15:23:46,296 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:23:46,296 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:23:46,339 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:46,340 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:46,341 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0001_r_000000_0 is allowed to commit now
2013-10-31 15:23:46,343 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0001_r_000000_0' to file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A
2013-10-31 15:23:49,244 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:23:49,246 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0001_r_000000_0' done.
2013-10-31 15:23:49,260 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:23:49,264 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:49,265 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:49,278 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:49,281 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:49,336 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0002
2013-10-31 15:23:49,339 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:49,340 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@59fc982f
2013-10-31 15:23:49,341 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A/part-00000
2013-10-31 15:23:49,346 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:49,347 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:49,388 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:49,388 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:49,389 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:49,390 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:23:49,400 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][3839691910/euclidean/]
2013-10-31 15:23:49,400 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:23:49,439 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:49,473 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:49,475 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0002_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:52,341 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/3839691910_euclidean_C0846ABE38034977BE9B391FA214A42A/part-00000:0+13816
2013-10-31 15:23:52,343 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0002_m_000000_0' done.
2013-10-31 15:23:52,348 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:52,351 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@2cb10995
2013-10-31 15:23:52,351 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:52,354 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:52,355 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:23:52,355 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:52,357 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:52,357 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:23:52,378 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:23:52,378 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][6500997619/euclidean/]
2013-10-31 15:23:52,401 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0002_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:52,402 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:52,403 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0002_r_000000_0 is allowed to commit now
2013-10-31 15:23:52,404 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0002_r_000000_0' to file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE
2013-10-31 15:23:55,350 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:23:55,352 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0002_r_000000_0' done.
2013-10-31 15:23:55,361 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...tform/euclidean/composite
2013-10-31 15:23:55,365 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:23:55,365 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:23:55,383 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:23:55,389 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:23:55,432 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0003
2013-10-31 15:23:55,434 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:55,436 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3feb908d
2013-10-31 15:23:55,438 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE/part-00000
2013-10-31 15:23:55,441 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:23:55,441 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:23:55,520 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:23:55,520 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:23:55,522 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:23:55,522 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:23:55,538 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][6500997619/euclidean/]
2013-10-31 15:23:55,538 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'n1', 'n2']]
2013-10-31 15:23:55,552 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:23:55,560 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:23:55,561 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0003_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:58,436 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/6500997619_euclidean_A94081EE9B6F445982476D4B3E68E0EE/part-00000:0+6943
2013-10-31 15:23:58,438 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0003_m_000000_0' done.
2013-10-31 15:23:58,443 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:23:58,445 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@95ac5d5
2013-10-31 15:23:58,445 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:58,447 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:23:58,448 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 6467 bytes
2013-10-31 15:23:58,448 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:58,450 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:23:58,450 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:23:58,462 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'n1', 'n2']]
2013-10-31 15:23:58,462 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite"]
2013-10-31 15:23:58,474 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0003_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:23:58,474 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:23:58,475 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0003_r_000000_0 is allowed to commit now
2013-10-31 15:23:58,477 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0003_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite
2013-10-31 15:24:01,445 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:01,447 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0003_r_000000_0' done.
2013-10-31 15:24:01,460 INFO util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/composite/_temporary
2013-10-31 15:24:01,496 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,511 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,512 INFO cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/composite
2013-10-31 15:24:01,529 INFO util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:01,529 INFO planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:01,580 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] starting
2013-10-31 15:24:01,580 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:01,580 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite"]
2013-10-31 15:24:01,581 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] parallel execution is enabled: false
2013-10-31 15:24:01,581 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] starting jobs: 3
2013-10-31 15:24:01,581 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [pearson] allocating threads: 1
2013-10-31 15:24:01,581 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (1/3)
2013-10-31 15:24:01,583 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:01,583 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:01,590 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:01,592 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:01,624 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0004
2013-10-31 15:24:01,626 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:01,627 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@509dcbbb
2013-10-31 15:24:01,628 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:01,629 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:01,629 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:01,648 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:01,648 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:01,649 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:01,649 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:01,659 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:01,659 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(pearson)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:24:01,663 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:01,665 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:01,667 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0004_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:04,629 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:04,630 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0004_m_000000_0' done.
2013-10-31 15:24:04,635 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:04,637 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@2379fda4
2013-10-31 15:24:04,637 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:04,639 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:04,640 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:04,640 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:04,641 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:04,642 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:04,659 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(pearson)[by:[{1}:'l'][numSelfJoins:1]]
2013-10-31 15:24:04,659 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][992065113/pearson/]
2013-10-31 15:24:04,662 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:04,662 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:04,685 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0004_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:04,685 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:04,686 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0004_r_000000_0 is allowed to commit now
2013-10-31 15:24:04,687 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0004_r_000000_0' to file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484
2013-10-31 15:24:07,636 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:07,638 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0004_r_000000_0' done.
2013-10-31 15:24:07,645 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (2/3)
2013-10-31 15:24:07,650 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:07,651 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:07,667 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:07,672 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:07,700 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0005
2013-10-31 15:24:07,702 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:07,703 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@4e2b5fab
2013-10-31 15:24:07,704 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484/part-00000
2013-10-31 15:24:07,705 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:07,705 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:07,746 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:07,747 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:07,748 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:07,748 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:07,755 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][992065113/pearson/]
2013-10-31 15:24:07,756 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(pearson)[by:[{?}:ALL]]
2013-10-31 15:24:07,766 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:07,777 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:07,778 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0005_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:10,704 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/992065113_pearson_A68FB44274DB45FAA3ED835AA45AF484/part-00000:0+13816
2013-10-31 15:24:10,706 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0005_m_000000_0' done.
2013-10-31 15:24:10,710 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:10,712 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@c7d2c2b
2013-10-31 15:24:10,712 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:10,714 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:10,715 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:10,715 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:10,716 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:10,716 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:10,723 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(pearson)[by:[{?}:ALL]]
2013-10-31 15:24:10,723 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][920026181/pearson/]
2013-10-31 15:24:10,737 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0005_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:10,737 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:10,738 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0005_r_000000_0 is allowed to commit now
2013-10-31 15:24:10,739 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0005_r_000000_0' to file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1
2013-10-31 15:24:13,712 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:13,713 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0005_r_000000_0' done.
2013-10-31 15:24:15,290 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] starting step: (3/3) ...latform/pearson/composite
2013-10-31 15:24:15,294 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:15,295 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:15,311 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:15,314 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:15,343 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [pearson] submitted hadoop job: job_local_0006
2013-10-31 15:24:15,348 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:15,350 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@40bde86b
2013-10-31 15:24:15,352 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1/part-00000
2013-10-31 15:24:15,354 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:15,354 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:15,396 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:15,396 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:15,397 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:15,397 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:15,405 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['l', 'n1', 'v1', 'n2', 'v2']]"][920026181/pearson/]
2013-10-31 15:24:15,405 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(pearson)[by:[{2}:'n1', 'n2']]
2013-10-31 15:24:15,410 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:15,413 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:15,415 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0006_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:18,351 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/920026181_pearson_8CC62A8EF333437F8EE6D3DD00F5BFB1/part-00000:0+6943
2013-10-31 15:24:18,352 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0006_m_000000_0' done.
2013-10-31 15:24:18,357 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:18,358 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@5ea7f1b1
2013-10-31 15:24:18,359 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:18,361 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:18,361 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 6467 bytes
2013-10-31 15:24:18,362 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:18,363 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:18,363 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:18,377 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(pearson)[by:[{2}:'n1', 'n2']]
2013-10-31 15:24:18,378 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite"]
2013-10-31 15:24:18,483 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0006_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:18,484 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:18,486 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0006_r_000000_0 is allowed to commit now
2013-10-31 15:24:18,523 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0006_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite
2013-10-31 15:24:21,358 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:21,360 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0006_r_000000_0' done.
2013-10-31 15:24:21,368 INFO util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/pearson/composite/_temporary
2013-10-31 15:24:21,391 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,403 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,404 INFO cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/pearson/composite
2013-10-31 15:24:21,415 INFO util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:21,415 INFO planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:21,462 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:24:21,463 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:21,463 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long"]
2013-10-31 15:24:21,463 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] parallel execution is enabled: false
2013-10-31 15:24:21,463 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting jobs: 3
2013-10-31 15:24:21,463 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] allocating threads: 1
2013-10-31 15:24:21,464 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:24:21,468 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:21,469 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:21,478 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:21,480 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:21,503 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0007
2013-10-31 15:24:21,507 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:21,509 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3f025aba
2013-10-31 15:24:21,511 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:21,513 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:21,513 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:21,547 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:21,548 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:21,554 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:21,554 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:21,562 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:21,562 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:21,565 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:21,566 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:21,568 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0007_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:24,510 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:24,511 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0007_m_000000_0' done.
2013-10-31 15:24:24,516 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:24,517 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@59b4ba80
2013-10-31 15:24:24,518 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:24,520 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:24,520 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:24,521 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:24,522 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:24,522 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:24,536 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:24,536 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1918939469/euclidean/]
2013-10-31 15:24:24,539 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:24,539 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:24,556 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0007_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:24,556 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:24,557 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0007_r_000000_0 is allowed to commit now
2013-10-31 15:24:24,558 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0007_r_000000_0' to file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64
2013-10-31 15:24:27,517 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:27,519 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0007_r_000000_0' done.
2013-10-31 15:24:27,529 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:24:27,533 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:27,534 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:27,547 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:27,551 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:27,575 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0008
2013-10-31 15:24:27,579 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:27,581 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@31e4c806
2013-10-31 15:24:27,583 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64/part-00000
2013-10-31 15:24:27,585 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:27,586 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:27,611 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:27,613 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:27,622 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:27,622 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:27,628 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1918939469/euclidean/]
2013-10-31 15:24:27,628 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:27,635 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:27,643 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:27,643 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0008_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:30,581 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1918939469_euclidean_525A16B2C8C44B0D8960271BC5547C64/part-00000:0+13816
2013-10-31 15:24:30,582 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0008_m_000000_0' done.
2013-10-31 15:24:30,584 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:30,584 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@66dab355
2013-10-31 15:24:30,584 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:30,585 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:30,585 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:30,585 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:30,586 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:30,586 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:30,592 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:30,592 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][1388883191/euclidean/]
2013-10-31 15:24:30,607 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0008_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:30,607 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:30,608 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0008_r_000000_0 is allowed to commit now
2013-10-31 15:24:30,609 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0008_r_000000_0' to file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE
2013-10-31 15:24:33,585 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:33,586 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0008_r_000000_0' done.
2013-10-31 15:24:33,593 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...seplatform/euclidean/long
2013-10-31 15:24:33,597 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:33,597 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:33,610 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:33,614 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:33,632 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0009
2013-10-31 15:24:33,636 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:33,637 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3f9b4e74
2013-10-31 15:24:33,638 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE/part-00000
2013-10-31 15:24:33,639 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:33,640 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:33,669 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:33,670 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:33,671 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:33,671 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:33,679 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][1388883191/euclidean/]
2013-10-31 15:24:33,679 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:33,688 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:33,692 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:33,694 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0009_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:36,638 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1388883191_euclidean_442F50A935FB450BA740526685D6A7FE/part-00000:0+7726
2013-10-31 15:24:36,639 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0009_m_000000_0' done.
2013-10-31 15:24:36,644 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:36,646 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@82c0480
2013-10-31 15:24:36,646 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:36,648 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:36,649 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 7250 bytes
2013-10-31 15:24:36,649 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:36,650 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:36,651 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:36,660 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:36,661 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long"]
2013-10-31 15:24:36,670 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0009_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:36,670 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:36,671 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0009_r_000000_0 is allowed to commit now
2013-10-31 15:24:36,672 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0009_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long
2013-10-31 15:24:39,646 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:39,648 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0009_r_000000_0' done.
2013-10-31 15:24:39,661 INFO util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/long/_temporary
2013-10-31 15:24:39,681 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,698 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,699 INFO cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/long
2013-10-31 15:24:39,708 INFO util.HadoopUtil (HadoopUtil.java:findMainClass(319)) - using default application jar, may cause class not found exceptions on the cluster
2013-10-31 15:24:39,708 INFO planner.HadoopPlanner (HadoopPlanner.java:initialize(203)) - using application jar: /home/hadoop/.gradle/caches/artifacts-26/filestore/cascading/cascading-hadoop/2.2.0/jar/c7b52bd4c6da523ebde0a5d6eca2475123783db2/cascading-hadoop-2.2.0.jar
2013-10-31 15:24:39,745 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting
2013-10-31 15:24:39,746 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] source: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:39,746 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] sink: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short"]
2013-10-31 15:24:39,747 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] parallel execution is enabled: false
2013-10-31 15:24:39,747 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] starting jobs: 3
2013-10-31 15:24:39,747 INFO flow.Flow (BaseFlow.java:logInfo(1306)) - [euclidean] allocating threads: 1
2013-10-31 15:24:39,748 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (1/3)
2013-10-31 15:24:39,752 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:39,753 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:39,766 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:39,769 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:39,790 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0010
2013-10-31 15:24:39,794 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:39,796 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@34fdf528
2013-10-31 15:24:39,798 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt
2013-10-31 15:24:39,800 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:39,801 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:39,831 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:39,832 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:39,833 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:39,833 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:39,840 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: Hfs["TextLine[['offset', 'line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt"]
2013-10-31 15:24:39,840 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:39,842 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:39,844 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:39,845 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0010_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:42,797 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/classes/test/data/critics.txt:0+828
2013-10-31 15:24:42,798 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0010_m_000000_0' done.
2013-10-31 15:24:42,803 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:42,805 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@fde0a96
2013-10-31 15:24:42,805 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:42,808 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:42,809 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 1818 bytes
2013-10-31 15:24:42,809 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:42,810 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:42,811 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:42,820 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: CoGroup(euclidean)[by:[{1}:'movie'][numSelfJoins:1]]
2013-10-31 15:24:42,820 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1676381248/euclidean/]
2013-10-31 15:24:42,823 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(105)) - attempting to load codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:42,823 INFO collect.SpillableTupleList (SpillableTupleList.java:getCodecClass(110)) - found codec: org.apache.hadoop.io.compress.GzipCodec
2013-10-31 15:24:42,840 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0010_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:42,840 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:42,841 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0010_r_000000_0 is allowed to commit now
2013-10-31 15:24:42,843 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0010_r_000000_0' to file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9
2013-10-31 15:24:45,804 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:45,806 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0010_r_000000_0' done.
2013-10-31 15:24:45,812 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (2/3)
2013-10-31 15:24:45,816 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:45,817 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:45,830 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:45,834 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:45,852 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0011
2013-10-31 15:24:45,854 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:45,855 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@9b64fac
2013-10-31 15:24:45,856 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9/part-00000
2013-10-31 15:24:45,856 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:45,857 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:45,880 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:45,880 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:45,881 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:45,881 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:45,887 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2']]"][1676381248/euclidean/]
2013-10-31 15:24:45,887 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:45,893 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:45,898 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:45,899 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0011_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:48,856 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/1676381248_euclidean_39667152B5F44457B3895155EA3524C9/part-00000:0+13816
2013-10-31 15:24:48,857 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0011_m_000000_0' done.
2013-10-31 15:24:48,861 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:48,863 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@13515ded
2013-10-31 15:24:48,863 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:48,865 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:48,866 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 13454 bytes
2013-10-31 15:24:48,866 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:48,867 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:48,868 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:48,878 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{?}:ALL]]
2013-10-31 15:24:48,878 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][6369659947/euclidean/]
2013-10-31 15:24:48,893 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0011_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:48,894 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:48,894 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0011_r_000000_0 is allowed to commit now
2013-10-31 15:24:48,896 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0011_r_000000_0' to file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC
2013-10-31 15:24:51,863 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:51,864 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0011_r_000000_0' done.
2013-10-31 15:24:51,871 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] starting step: (3/3) ...eplatform/euclidean/short
2013-10-31 15:24:51,874 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(825)) - Default number of map tasks: 1
2013-10-31 15:24:51,875 INFO mapred.JobClient (JobClient.java:setNumTasksBasedOnClusterCapacity(832)) - Default number of reduce tasks: 1
2013-10-31 15:24:51,888 INFO mapred.JobClient (JobClient.java:copyAndConfigureFiles(794)) - Setting group to hadoop
2013-10-31 15:24:51,892 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:51,910 INFO flow.FlowStep (BaseFlowStep.java:logInfo(742)) - [euclidean] submitted hadoop job: job_local_0012
2013-10-31 15:24:51,913 INFO mapred.MapTask (MapTask.java:run(352)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:51,913 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@22c5967f
2013-10-31 15:24:51,914 INFO io.MultiInputSplit (MultiInputSplit.java:readFields(161)) - current split input path: file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC/part-00000
2013-10-31 15:24:51,916 INFO mapred.MapTask (MapTask.java:runOldMapper(430)) - numReduceTasks: 1
2013-10-31 15:24:51,916 INFO mapred.MapTask (MapTask.java:<init>(960)) - io.sort.mb = 100
2013-10-31 15:24:51,951 INFO mapred.MapTask (MapTask.java:<init>(972)) - data buffer = 79691776/99614720
2013-10-31 15:24:51,951 INFO mapred.MapTask (MapTask.java:<init>(973)) - record buffer = 262144/327680
2013-10-31 15:24:51,952 INFO hadoop.FlowMapper (FlowMapper.java:configure(70)) - cascading version: 2.2.0
2013-10-31 15:24:51,952 INFO hadoop.FlowMapper (FlowMapper.java:configure(71)) - child jvm opts:
2013-10-31 15:24:51,958 INFO hadoop.FlowMapper (FlowMapper.java:configure(86)) - sourcing from: TempHfs["SequenceFile[['movie', 'name1', 'rate1', 'name2', 'rate2', 'score']]"][6369659947/euclidean/]
2013-10-31 15:24:51,958 INFO hadoop.FlowMapper (FlowMapper.java:configure(89)) - sinking to: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:51,962 INFO mapred.MapTask (MapTask.java:flush(1300)) - Starting flush of map output
2013-10-31 15:24:51,965 INFO mapred.MapTask (MapTask.java:sortAndSpill(1484)) - Finished spill 0
2013-10-31 15:24:51,966 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0012_m_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:54,914 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - file:/tmp/hadoop-hadoop/6369659947_euclidean_D8605651C8F7468EABC0EE5A294460CC/part-00000:0+7726
2013-10-31 15:24:54,915 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0012_m_000000_0' done.
2013-10-31 15:24:54,920 INFO mapred.ReduceTask (ReduceTask.java:run(360)) - Host name: ip-10-5-128-102.ec2.internal
2013-10-31 15:24:54,921 INFO mapred.Task (Task.java:initialize(534)) - Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@65ae615a
2013-10-31 15:24:54,922 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:54,923 INFO mapred.Merger (Merger.java:merge(390)) - Merging 1 sorted segments
2013-10-31 15:24:54,924 INFO mapred.Merger (Merger.java:merge(473)) - Down to the last merge-pass, with 1 segments left of total size: 7250 bytes
2013-10-31 15:24:54,924 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:54,925 INFO hadoop.FlowReducer (FlowReducer.java:configure(77)) - cascading version: 2.2.0
2013-10-31 15:24:54,926 INFO hadoop.FlowReducer (FlowReducer.java:configure(78)) - child jvm opts:
2013-10-31 15:24:54,938 INFO hadoop.FlowReducer (FlowReducer.java:configure(96)) - sourcing from: GroupBy(euclidean)[by:[{2}:'name1', 'name2']]
2013-10-31 15:24:54,938 INFO hadoop.FlowReducer (FlowReducer.java:configure(99)) - sinking to: Hfs["TextLine[['line']->[ALL]]"]["/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short"]
2013-10-31 15:24:54,945 INFO mapred.Task (Task.java:done(852)) - Task:attempt_local_0012_r_000000_0 is done. And is in the process of commiting
2013-10-31 15:24:54,945 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) -
2013-10-31 15:24:54,945 INFO mapred.Task (Task.java:commit(1005)) - Task attempt_local_0012_r_000000_0 is allowed to commit now
2013-10-31 15:24:54,947 INFO mapred.FileOutputCommitter (FileOutputCommitter.java:commitTask(138)) - Saved output of task 'attempt_local_0012_r_000000_0' to file:/home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short
2013-10-31 15:24:57,921 INFO mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(321)) - reduce > reduce
2013-10-31 15:24:57,922 INFO mapred.Task (Task.java:sendDone(964)) - Task 'attempt_local_0012_r_000000_0' done.
2013-10-31 15:24:57,931 INFO util.Hadoop18TapUtil (Hadoop18TapUtil.java:cleanTempPath(219)) - deleting temp path /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output/hadoop/distanceusecaseplatform/euclidean/short/_temporary
2013-10-31 15:24:57,950 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:57,961 INFO mapred.FileInputFormat (FileInputFormat.java:listStatus(199)) - Total input paths to process : 1
2013-10-31 15:24:57,963 INFO cascading.PlatformTestCase (PlatformTestCase.java:tearDown(209)) - copying to local /home/hadoop/cascading.compatibility/amazon-emr-2.4.1/build/test/output//hadoop/distanceusecaseplatform/euclidean/short