From eddf8b0d3e878ea4fa2616d2c4430e43c16ce458 Mon Sep 17 00:00:00 2001 From: Kui Liu Date: Tue, 7 Aug 2018 19:47:01 +0200 Subject: [PATCH] Reading fix patterns. --- inputData/1.txt | 730 ++++++++++++++++++ pom.xml | 8 +- .../MultiThreadTreeLoaderCluster.java | 1 + .../MultiThreadTreeLoaderCluster3.java | 64 +- src/main/java/edu/lu/uni/serval/Launcher.java | 2 +- .../edu/lu/uni/serval/PatternExtractor.java | 27 +- .../java/edu/lu/uni/serval/TreeToString.java | 47 ++ .../regroup/HierarchicalRegrouperForC.java | 34 +- 8 files changed, 860 insertions(+), 53 deletions(-) create mode 100644 inputData/1.txt create mode 100644 src/main/java/edu/lu/uni/serval/TreeToString.java diff --git a/inputData/1.txt b/inputData/1.txt new file mode 100644 index 0000000..46e4275 --- /dev/null +++ b/inputData/1.txt @@ -0,0 +1,730 @@ +https://github.com/IDPF/epubcheck.git +https://github.com/treasure-data/td-import-java.git +https://github.com/Atmosphere/atmosphere-extensions.git +https://github.com/astefanutti/camel-cdi.git +https://github.com/imagej/imagej-ui-swing.git +https://github.com/komoot/photon.git +https://github.com/FIWARE-Middleware/KIARA.git +https://github.com/mbosecke/pebble.git +https://github.com/metamx/java-util.git +https://github.com/contextproject/discover.git +https://github.com/btrplace/scheduler.git +https://github.com/tuwiendsg/rSYBL.git +https://github.com/sker65/go-dmd-clock.git +https://github.com/liferay/liferay-maven-support.git +https://github.com/0xCopy/RelaxFactory.git +https://github.com/caskdata/hydrator-plugins.git +https://github.com/joelittlejohn/jsonschema2pojo.git +https://github.com/ArcBees/gwtquery.git +https://github.com/redpen-cc/redpen.git +https://github.com/sviperll/chicory.git +https://github.com/spotify/cassandra-reaper.git +https://github.com/sakai-mirror/calendar.git +https://github.com/notthebees/languagegames.git +https://github.com/imagej/ij1-patcher.git +https://github.com/cjdev/versions-maven-plugin.git +https://github.com/phax/as2-lib.git +https://github.com/RolecraftDev/RolecraftCore.git +https://github.com/dbs-leipzig/gradoop.git +https://github.com/dbsoftcombr/dbssdk.git +https://github.com/shabbies/clockwork.git +https://github.com/skillcoyne/IGCSA.git +https://github.com/GlobalTechnology/idm-user-management.git +https://github.com/instance01/MinigamesAPI.git +https://github.com/jaywarrick/JEX.git +https://github.com/mwcaisse/AndroidFT.git +https://github.com/figarocms/cucumber-jvm-fixtures.git +https://github.com/jillesvangurp/jsonj.git +https://github.com/SonarSource/sslr.git +https://github.com/OpenTreeOfLife/taxomachine.git +https://github.com/The-Dream-Team/Tardis.git +https://github.com/digipost/digipost-api-client-java.git +https://github.com/nuxeo/nuxeo-platform-video.git +https://github.com/INL/BlackLab.git +https://github.com/andrewgaul/s3proxy.git +https://github.com/OpenRock/OpenICF-java-framework.git +https://github.com/blurpy/kouchat-android.git +https://github.com/hypercube1024/firefly.git +https://github.com/Poweruser/MinetickMod.git +https://github.com/exoplatform/wiki.git +https://github.com/Bukkit/Bukkit.git +https://github.com/zanata/zanata-api.git +https://github.com/stripe/stripe-java.git +https://github.com/allanbank/mongodb-async-driver.git +https://github.com/AMOSTeam3/amos-ss15-proj3.git +https://github.com/groovy/GMavenPlus.git +https://github.com/SUSE/saltstack-netapi-client-java.git +https://github.com/nuxeo/nuxeo-chemistry.git +https://github.com/wattdepot/wattdepot.git +https://github.com/nuxeo/nuxeo-platform-forms-layout-demo.git +https://github.com/deeplearning4j/nd4j.git +https://github.com/NFSdb/nfsdb.git +https://github.com/cismet/cids-server.git +https://github.com/sarahtattersall/PIPE.git +https://github.com/apache/maven-shared.git +https://github.com/daisy/pipeline-mod-braille.git +https://github.com/jenkinsci/xunit-plugin.git +https://github.com/jclouds/jclouds-karaf.git +https://github.com/DICE-UNC/jargon.git +https://github.com/ancoron/glassfish-main.git +https://github.com/FallenMoonNetwork/CanaryLib.git +https://github.com/phillipsic/SeleniumDriverFramework.git +https://github.com/dekellum/iudex.git +https://github.com/apache/openjpa.git +https://github.com/Praqma/cool.git +https://github.com/SciGraph/SciGraph.git +https://github.com/TeamShadow/shadow.git +https://github.com/cowtowncoder/ClusterMate.git +https://github.com/shubhcollaborator/common-csvnew.git +https://github.com/albfan/jmeld.git +https://github.com/strongbox/strongbox.git +https://github.com/dropwizard/metrics.git +https://github.com/Studentmediene/Momus.git +https://github.com/apache/continuum.git +https://github.com/jenkinsci/junit-plugin.git +https://github.com/pcpratts/rootbeer1.git +https://github.com/nieshr/xjr.git +https://github.com/LiveRamp/hank.git +https://github.com/ralscha/extdirectspring.git +https://github.com/sjanaud/jensoft-core.git +https://github.com/marschraner/svm.git +https://github.com/LEDS/sincap-entities.git +https://github.com/AugGroup/hr-db.git +https://github.com/MineSworn/UltimateArena.git +https://github.com/benas/easy-batch.git +https://github.com/ocpsoft/rewrite.git +https://github.com/square/dagger.git +https://github.com/pgjdbc/pgjdbc.git +https://github.com/membrane/service-proxy.git +https://github.com/getsentry/raven-java.git +https://github.com/jboss-javassist/javassist.git +https://github.com/mybatis/mybatis-3.git +https://github.com/opendaylight/groupbasedpolicy.git +https://github.com/Bammerbom/UltimateCore.git +https://github.com/mrniko/netty-socketio.git +https://github.com/searchbox-io/Jest.git +https://github.com/jo-pol/DiBL.git +https://github.com/sukirtigupta/slf4j.git +https://github.com/immutables/immutables.git +https://github.com/dumptruckman/PluginBase.git +https://github.com/TridentSDK/TridentSDK.git +https://github.com/johan-martenson/settlers.git +https://github.com/lightblue-platform/lightblue-core.git +https://github.com/cismet/cismet-gui-commons.git +https://github.com/dakusui/jcunit.git +https://github.com/Pi4J/pi4j.git +https://github.com/lutece-secteur-public/espacepublic-plugin-dansmarue.git +https://github.com/lexicalscope/svm.git +https://github.com/ucam-cl-dtg/urop-2013-questions.git +https://github.com/grzegorz2047/OpenGuild2047.git +https://github.com/davidsoergel/ml.git +https://github.com/apache/sirona.git +https://github.com/Stratio/cassandra-lucene-index.git +https://github.com/jdr0887/MaPSeq-Pipeline-NCGenes-DX.git +https://github.com/Azure/azure-sdk-for-java.git +https://github.com/sakai-mirror/msgcntr.git +https://github.com/openmrs/openmrs-module-emrapi.git +https://github.com/openmrs/openmrs-module-mdrtb.git +https://github.com/iZettle/izettle-toolbox.git +https://github.com/jmxtrans/jmxtrans.git +https://github.com/effektif/effektif.git +https://github.com/DataTorrent/Apex.git +https://github.com/aeshell/aesh.git +https://github.com/SophieKoonin/JavaDecaf.git +https://github.com/jitsi/jitsi-videobridge.git +https://github.com/renepickhardt/generalized-language-modeling-toolkit.git +https://github.com/TeamExodus/external_gson.git +https://github.com/minnal/minnal.git +https://github.com/soi-toolkit/soi-toolkit-mule.git +https://github.com/davidmoten/rtree.git +https://github.com/zendesk/maxwell.git +https://github.com/apache/commons-compress.git +https://github.com/hector-client/hector.git +https://github.com/tubav/fiteagle.git +https://github.com/AtlasOfLivingAustralia/bie-service.git +https://github.com/sakai-mirror/basiclti.git +https://github.com/nuodb/migration-tools.git +https://github.com/jahlborn/jackcess.git +https://github.com/SebastianNiemann/ArmadilloJava.git +https://github.com/RuedigerMoeller/kontraktor.git +https://github.com/carewebframework/carewebframework-core.git +https://github.com/BattleNight/BattleNight-Core.git +https://github.com/apache/directory-studio.git +https://github.com/apache/creadur-rat.git +https://github.com/ARUP-NGS/Pipeline.git +https://github.com/zenbones/SmallMind.git +https://github.com/jayway/rest-assured.git +https://github.com/datanucleus/datanucleus-rdbms.git +https://github.com/TridentSDK/Trident.git +https://github.com/wildfly-security/wildfly-elytron.git +https://github.com/mathieu-bellange/ourses-plumes.git +https://github.com/apache/vxquery.git +https://github.com/openmrs/openmrs-distro-referenceapplication.git +https://github.com/apache/httpclient.git +https://github.com/costamojan/xbean.git +https://github.com/masatomix/repo.git +https://github.com/lviggiano/owner.git +https://github.com/apache/juddi.git +https://github.com/seulkikims/hashtable.git +https://github.com/davidkey/supertunnel.git +https://github.com/irstv/H2GIS.git +https://github.com/mindwind/craft-atom.git +https://github.com/relayrides/pushy.git +https://github.com/jdereg/json-io.git +https://github.com/Welchd1/resolve-lite.git +https://github.com/jmacglashan/burlap.git +https://github.com/fluxroot/hadaps.git +https://github.com/matthias-mueller/movingcode.git +https://github.com/aceleradora6-tw/RegistroLivre.git +https://github.com/st-js/st-js.git +https://github.com/OpenHFT/HugeCollections.git +https://github.com/zycgit/hasor.git +https://github.com/sesuncedu/htmlparser.git +https://github.com/lsds/SEEP.git +https://github.com/stephenostermiller/ostermillerutils.git +https://github.com/FamilySearch/gedcomx-java.git +https://github.com/apache/ddlutils.git +https://github.com/optimizationBenchmarking/optimizationBenchmarking.git +https://github.com/SomMeri/less4j.git +https://github.com/xwiki/xwiki-rendering.git +https://github.com/worm1k/OSS_Cauliflower.git +https://github.com/sakai-mirror/portal.git +https://github.com/aht-group/ofx.git +https://github.com/apache/struts.git +https://github.com/stapler/stapler.git +https://github.com/cismet/cids-navigator.git +https://github.com/virgo47/javasimon.git +https://github.com/xwic/appkit.git +https://github.com/tehbeard/BeardStat.git +https://github.com/nuxeo/nuxeo-dam.git +https://github.com/DavidAlphaFox/jetlang-core.git +https://github.com/openspim/SPIMAcquisition.git +https://github.com/dbsoftcombr/dbsfaces.git +https://github.com/sualeh/DaylightChart.git +https://github.com/mkovatsc/Californium.git +https://github.com/meltmedia/cadmium.git +https://github.com/diirt/graphene.git +https://github.com/martiner/gooddata-java.git +https://github.com/jdr0887/MaPSeq-Pipeline-CASAVA.git +https://github.com/egonw/cdk.git +https://github.com/AtlasOfLivingAustralia/biocache-service.git +https://github.com/ChiralBehaviors/Ultrastructure.git +https://github.com/lobid/lodmill.git +https://github.com/paul-hammant/qdox.git +https://github.com/SonarCommunity/sonar-javascript.git +https://github.com/jim618/multibit.git +https://github.com/dropwizard/dropwizard.git +https://github.com/tranzero/SoftServe_Java-105_Project.git +https://github.com/tinkerpop/blueprints.git +https://github.com/webbukkit/dynmap.git +https://github.com/bitcoinj/bitcoinj.git +https://github.com/AxonFramework/AxonFramework.git +https://github.com/hawkular/hawkular-metrics.git +https://github.com/picketlink/picketlink.git +https://github.com/opendaylight/bgpcep.git +https://github.com/rage/tmc-cli.git +https://github.com/openfigis/vme.git +https://github.com/apache/commons-codec.git +https://github.com/square/wire.git +https://github.com/Wikidata/Wikidata-Toolkit.git +https://github.com/itm/testbed-runtime.git +https://github.com/tomp2p/TomP2P.git +https://github.com/j256/ormlite-core.git +https://github.com/nishihatapalmer/byteseek.git +https://github.com/rapla/rapla.git +https://github.com/Bombe/Sone.git +https://github.com/yusuke/twitter4j.git +https://github.com/jhpoelen/eol-globi-data.git +https://github.com/google/error-prone.git +https://github.com/basho/riak-java-client.git +https://github.com/jamesagnew/hapi-fhir.git +https://github.com/brooklyncentral/clocker.git +https://github.com/spring-projects/spring-data-commons.git +https://github.com/SonarCommunity/sonar-php.git +https://github.com/imglib/imglib2.git +https://github.com/h2oai/h2o.git +https://github.com/lucmoreau/ProvToolbox.git +https://github.com/threerings/tripleplay.git +https://github.com/chapmajs/shiro.git +https://github.com/zalando-stups/fullstop.git +https://github.com/airlift/airlift.git +https://github.com/dcm4che/dcm4che.git +https://github.com/myui/hivemall.git +https://github.com/cismet/cismap-commons.git +https://github.com/suggitpe/java-sandbox.git +https://github.com/zanata/zanata-client.git +https://github.com/dalderliesten/Scrumbledore.git +https://github.com/cojen/Tupl.git +https://github.com/pendelhaven3/magic.git +https://github.com/jenkinsci/acceptance-test-harness.git +https://github.com/gbif/occurrence.git +https://github.com/OpenHFT/Java-Lang.git +https://github.com/doanduyhai/Achilles.git +https://github.com/shilad/wikibrain.git +https://github.com/xdoo/vaadin-demo.git +https://github.com/nuxeo/nuxeo-platform-document-routing.git +https://github.com/jitlogic/zorka.git +https://github.com/UniTime/unitime.git +https://github.com/anba/es6draft.git +https://github.com/yogendra12/Rescuefy.git +https://github.com/bcdev/ceres.git +https://github.com/OpenTreeOfLife/treemachine.git +https://github.com/Carboni/zebedee.git +https://github.com/Gigaspaces/mongo-datasource.git +https://github.com/mebigfatguy/fb-contrib.git +https://github.com/google/guava.git +https://github.com/mleduque/ide.git +https://github.com/finmath/finmath-lib.git +https://github.com/sbwhitecap/Phex-trunk.git +https://github.com/named-data/jndn.git +https://github.com/thinkaurelius/titan.git +https://github.com/apache/accumulo.git +https://github.com/Activiti/Activiti.git +https://github.com/apache/pdfbox.git +https://github.com/apache/servicemix4-bundles.git +https://github.com/deeplearning4j/deeplearning4j.git +https://github.com/checkstyle/checkstyle.git +https://github.com/IntroPV/vainilla.git +https://github.com/korpling/ANNIS.git +https://github.com/undertow-io/undertow.git +https://github.com/arjovanramshorst/bubble-bobble-sem.git +https://github.com/bcdev/coastcolour.git +https://github.com/nuxeo/nuxeo-platform-semantic-entities.git +https://github.com/protegeproject/swrlapi.git +https://github.com/jenkinsci/envinject-plugin.git +https://github.com/jensnerche/plantuml.git +https://github.com/teanutella/AppEmployee.git +https://github.com/SonarSource/sonar-python.git +https://github.com/Atmosphere/nettosphere.git +https://github.com/apache/maven-release.git +https://github.com/svn2github/commons-vfs2.git +https://github.com/guibin/Knowledge.git +https://github.com/kmbulebu/NickNack.git +https://github.com/alecgorge/jsonapi.git +https://github.com/aaron-santos/lanterna.git +https://github.com/Appendium/flatpack.git +https://github.com/google/closure-templates.git +https://github.com/XBigTK13X/Aigilas.git +https://github.com/greese/dasein-cloud-openstack.git +https://github.com/Graphity/graphity-client.git +https://github.com/mung3r/ecoCreature.git +https://github.com/harrifeng/java-in-action.git +https://github.com/no-hope/java-toolkit.git +https://github.com/tinman89/tinspx-utils.git +https://github.com/spring-cloud/spring-cloud-aws.git +https://github.com/DigitalPebble/storm-crawler.git +https://github.com/bitbar/testdroid-api.git +https://github.com/takari/takari-lifecycle.git +https://github.com/EHRI/ehri-rest.git +https://github.com/grisu/gricli.git +https://github.com/FasterXML/jackson-dataformat-xml.git +https://github.com/photon-infotech/commons.git +https://github.com/ClemsonRSRG/RESOLVE.git +https://github.com/Ensembl/ensj-healthcheck.git +https://github.com/haraldk/TwelveMonkeys.git +https://github.com/lkroesen/BubbleBobble.git +https://github.com/sugarcrm/candybean.git +https://github.com/stackmob/stackmob-java-client-sdk.git +https://github.com/undera/jmeter-plugins.git +https://github.com/ldbc/ldbc_driver.git +https://github.com/mojohaus/nbm-maven-plugin.git +https://github.com/openpnp/openpnp.git +https://github.com/paulhoule/infovore.git +https://github.com/simoc/csvjdbc.git +https://github.com/vkostyukov/la4j.git +https://github.com/valis/vclang.git +https://github.com/objectify/objectify.git +https://github.com/apache/maven-surefire.git +https://github.com/bpiwowar/experimaestro.git +https://github.com/visiriCEP/VISIRI.git +https://github.com/Incoding/apiTest.git +https://github.com/revapi/revapi.git +https://github.com/jdr0887/MaPSeq-Pipeline-RNASeq.git +https://github.com/jdr0887/MaPSeq-Pipeline-NCGenes-DepthOfCoverage.git +https://github.com/StopBadware/dsp-core.git +https://github.com/DerPavlov/Cannons.git +https://github.com/thinkofdeath/ThinkMap.git +https://github.com/nuxeo/nuxeo-diff.git +https://github.com/apache/mina-sshd.git +https://github.com/buddycloud/buddycloud-server-java.git +https://github.com/jenkinsci/remoting.git +https://github.com/PIH/mirebalais-smoke-tests.git +https://github.com/antoine-tran/Hedera.git +https://github.com/carrotsearch/randomizedtesting.git +https://github.com/keeps/db-preservation-toolkit.git +https://github.com/jbossas/jboss-ejb-client.git +https://github.com/hortonworks/knox.git +https://github.com/jbossws/jbossws-spi.git +https://github.com/VincSch/Photoplatform_Angular_Spring.git +https://github.com/eileenzheng/playground.git +https://github.com/selig/qea.git +https://github.com/apache/commons-bcel.git +https://github.com/apache/directory-shared.git +https://github.com/triceo/splitlog.git +https://github.com/ovgu-ccd/jchess.git +https://github.com/jsr107/jsr107tck.git +https://github.com/jenkinsci/matrix-project-plugin.git +https://github.com/datasalt/pangool.git +https://github.com/exoplatform/integration.git +https://github.com/kovertopz/Framework-GL.git +https://github.com/desht/ScrollingMenuSign.git +https://github.com/FluentLenium/FluentLenium.git +https://github.com/apache/maven-scm.git +https://github.com/KnisterPeter/Smaller.git +https://github.com/oboformat/oboformat.git +https://github.com/FoxDev/FoxBot.git +https://github.com/sonatype/nexus-maven-plugins.git +https://github.com/bridgedb/BridgeDb.git +https://github.com/locationtech/spatial4j.git +https://github.com/jenkinsci/sauce-ondemand-plugin.git +https://github.com/kohsuke/github-api.git +https://github.com/niyue/coding.git +https://github.com/roundrop/facebook4j.git +https://github.com/sk89q/CommandBook.git +https://github.com/cylong1016/NJULily.git +https://github.com/vivantech/kc_fixes.git +https://github.com/Weltraumschaf/JUberblog.git +https://github.com/marytts/marytts.git +https://github.com/richardwilly98/elasticsearch-river-mongodb.git +https://github.com/andsel/moquette.git +https://github.com/diorcety/maven-dependency-plugin.git +https://github.com/gbif/checklistbank.git +https://github.com/huangyingw/blablacode.git +https://github.com/reinert/requestor.git +https://github.com/tntim96/JSCover.git +https://github.com/xenomorpheus/heisenberg.git +https://github.com/zyong2004/mybatis-spring.git +https://github.com/roberth/pitest.git +https://github.com/JensBee/QueryClarity.git +https://github.com/Weltraumschaf/commons.git +https://github.com/greese/dasein-cloud-aws.git +https://github.com/bguerout/jongo.git +https://github.com/thomasjungblut/thomasjungblut-common.git +https://github.com/goalhub/runtime.git +https://github.com/apache/directory-server.git +https://github.com/clojure/clojure.git +https://github.com/wnameless/rubycollect4j.git +https://github.com/omnifaces/omnifaces.git +https://github.com/Razz0991/Minigames.git +https://github.com/apache/commons-fileupload.git +https://github.com/ChandraCXC/iris.git +https://github.com/zyyettie/LaaS.git +https://github.com/alibaba/druid.git +https://github.com/thothbot/parallax.git +https://github.com/msokolov/lux.git +https://github.com/jenkinsci/git-client-plugin.git +https://github.com/statsbiblioteket/newspaper-batch-event-framework.git +https://github.com/apache/curator.git +https://github.com/mikesname/ehri-rest.git +https://github.com/apache/chemistry-opencmis.git +https://github.com/Dandelion/dandelion.git +https://github.com/yuchaosydney/kouchat.git +https://github.com/xEssentials/xEssentials.git +https://github.com/couchbase/couchbase-java-client.git +https://github.com/sitewhere/sitewhere.git +https://github.com/scriptella/scriptella-etl.git +https://github.com/medusa-project/cantaloupe.git +https://github.com/charite/jannovar.git +https://github.com/code4craft/webmagic.git +https://github.com/tavlima/fosstrak-epcis.git +https://github.com/IMExConsortium/dip-proxy.git +https://github.com/junit-team/junit.git +https://github.com/twilio/twilio-java.git +https://github.com/sannies/mp4parser.git +https://github.com/xerial/sqlite-jdbc.git +https://github.com/seges/sesam.git +https://github.com/moravianlibrary/RecordManager2.git +https://github.com/Omertron/api-themoviedb.git +https://github.com/KittehOrg/KittehIRCClientLib.git +https://github.com/taverna/taverna-engine-core.git +https://github.com/dankurka/mgwt.git +https://github.com/FITeagle/adapters.git +https://github.com/drnoa/schemaspy.git +https://github.com/carrotsearch/hppc.git +https://github.com/neo4j/neo4j-ogm.git +https://github.com/calrissian/accumulo-recipes.git +https://github.com/apache/maven.git +https://github.com/GluuFederation/oxTrust.git +https://github.com/sanity/quickml.git +https://github.com/davidmoten/rxjava-extras.git +https://github.com/elex-bigdata/lldaMahout.git +https://github.com/tengstrand/Laja.git +https://github.com/spring-cloud/spring-cloud-config.git +https://github.com/trajano/doxdb.git +https://github.com/yegor256/s3auth.git +https://github.com/greese/dasein-cloud-core.git +https://github.com/jdr0887/MaPSeq-Pipeline-NCGenes.git +https://github.com/sesuncedu/elk-reasoner.git +https://github.com/ColoradoSchoolOfMines/interface_sdk.git +https://github.com/rhuss/docker-maven-plugin.git +https://github.com/k-hotta/SCAnalyzer.git +https://github.com/tastybento/askyblock.git +https://github.com/osiam/connector4java.git +https://github.com/juzu/juzu.git +https://github.com/OpenHFT/Chronicle-Queue.git +https://github.com/junkdog/artemis-odb.git +https://github.com/apache/archiva.git +https://github.com/ESAPI/esapi-java-legacy.git +https://github.com/motech/modules.git +https://github.com/Benoker/DockingFrames.git +https://github.com/barchart/barchart-feed.git +https://github.com/swagger-api/swagger-core.git +https://github.com/droolsjbpm/droolsjbpm-knowledge.git +https://github.com/soul2zimate/resteasy2.git +https://github.com/konsoletyper/teavm.git +https://github.com/veraPDF/veraPDF-library.git +https://github.com/VictorBac/LO23.git +https://github.com/europeana/corelib.git +https://github.com/GoogleCloudPlatform/gcloud-java.git +https://github.com/mojohaus/jaxws-maven-plugin.git +https://github.com/threerings/clyde.git +https://github.com/maxymania/jxta-jxse.git +https://github.com/xetorthio/jedis.git +https://github.com/thervh70/SEM_Team9.git +https://github.com/PerfCake/PerfCake.git +https://github.com/plutext/docx4j.git +https://github.com/slipcor/pvparena.git +https://github.com/barchart/barchart-feed-ddf.git +https://github.com/SpigotMC/BungeeCord.git +https://github.com/forcedotcom/phoenix.git +https://github.com/eFaps/eFaps-WebApp.git +https://github.com/AtlasOfLivingAustralia/spatial-portal.git +https://github.com/eugen-eugen/eugensjbehave.git +https://github.com/GenomicParisCentre/aozan.git +https://github.com/ikasanEIP/ikasan.git +https://github.com/WorldCretornica/PlotMe-Core.git +https://github.com/marklogic/java-client-api.git +https://github.com/apache/jackrabbit.git +https://github.com/tfredrich/docussandra.git +https://github.com/codeine-cd/codeine.git +https://github.com/Mobicents/sip-servlets.git +https://github.com/movsim/movsim.git +https://github.com/threerings/playn.git +https://github.com/jcabi/jcabi-github.git +https://github.com/apache/wss4j.git +https://github.com/galenframework/galen.git +https://github.com/marylinh/ESAPI_JAVA_ALL.git +https://github.com/wso2/carbon-mediation.git +https://github.com/SINTEF-9012/ThingML.git +https://github.com/OhmData/hbase-public.git +https://github.com/nodebox/nodebox.git +https://github.com/jayway/powermock.git +https://github.com/julianhyde/optiq.git +https://github.com/ImmobilienScout24/deadcode4j.git +https://github.com/rhuss/jolokia.git +https://github.com/taverna/taverna-ui-components.git +https://github.com/cloudera/cdk.git +https://github.com/qcadoo/qcadoo.git +https://github.com/ebean-orm/avaje-ebeanorm.git +https://github.com/simon-eastwood/DependencyCheckCM.git +https://github.com/spring-projects/spring-data-rest.git +https://github.com/hudec/sql-processor.git +https://github.com/aherbert/GDSC-SMLM.git +https://github.com/rovo89/public-transport-enabler-fork.git +https://github.com/Hidendra/LWC.git +https://github.com/twizmwazin/CardinalPGM.git +https://github.com/mrniko/redisson.git +https://github.com/bigdataops/bgpcep.git +https://github.com/GrizzlyNIO/grizzly-mirror.git +https://github.com/adangel/pmd.git +https://github.com/lrozenblyum/chess.git +https://github.com/nuxeo/nuxeo-drive-server.git +https://github.com/imagej/imagej-legacy.git +https://github.com/tyrus-project/tyrus.git +https://github.com/elBukkit/MagicLib.git +https://github.com/libetl/soundtransform.git +https://github.com/yegor256/takes.git +https://github.com/graphaware/neo4j-framework.git +https://github.com/sboesebeck/morphium.git +https://github.com/OpenRock/OpenIG.git +https://github.com/DeveloperLiberationFront/social-screencasting-core.git +https://github.com/geenen124/Team1BubbleTrouble.git +https://github.com/kaazing/gateway.git +https://github.com/sakai-mirror/sam.git +https://github.com/apache/commons-configuration.git +https://github.com/fiji/TrackMate3.git +https://github.com/yegor256/thindeck.git +https://github.com/Eluinhost/ultrahardcore.git +https://github.com/caprica/vlcj.git +https://github.com/threerings/depot.git +https://github.com/ysc/word.git +https://github.com/slowenthal/classproject.git +https://github.com/apache/directory-fortress-core.git +https://github.com/greese/dasein-cloud-google.git +https://github.com/insideo/randomcoder-website.git +https://github.com/lemire/RoaringBitmap.git +https://github.com/telefonicaid/fiware-cygnus.git +https://github.com/hk2-project/hk2.git +https://github.com/ninjaframework/ninja.git +https://github.com/Xephi/AuthMeReloaded.git +https://github.com/droolsjbpm/jbpm-form-modeler.git +https://github.com/gwtbootstrap/gwt-bootstrap.git +https://github.com/objectos/way.git +https://github.com/maxapryg/UITest.git +https://github.com/ghedlund/phon.git +https://github.com/aadnk/ProtocolLib.git +https://github.com/karamelchef/karamel.git +https://github.com/Hansschouten/context_health_informatics.git +https://github.com/apache/incubator-streams.git +https://github.com/kuujo/vertigo.git +https://github.com/vmi/selenese-runner-java.git +https://github.com/spotify/docker-client.git +https://github.com/JWebUnit/jwebunit.git +https://github.com/johncarl81/transfuse.git +https://github.com/sakai-mirror/kernel.git +https://github.com/CloudSlang/cloud-slang.git +https://github.com/datanucleus/datanucleus-core.git +https://github.com/eXistence/TeeTime.git +https://github.com/jitsi/libjitsi.git +https://github.com/FasterXML/jackson-core.git +https://github.com/mizdebsk/xmvn.git +https://github.com/ATLauncher/ATLauncher.git +https://github.com/maartentbm/ContextPL1.git +https://github.com/apache/incubator-ambari.git +https://github.com/spring-projects/spring-data-neo4j.git +https://github.com/apache/uima-uimaj.git +https://github.com/mongodb/morphia.git +https://github.com/minusone13/InvoicingSystem.git +https://github.com/jqno/equalsverifier.git +https://github.com/sdl/Testy.git +https://github.com/fcrepo4/fcrepo4.git +https://github.com/GenomicParisCentre/eoulsan.git +https://github.com/te-con/ehour.git +https://github.com/ansell/owlapi.git +https://github.com/opendaylight/yangtools.git +https://github.com/OpenRock/OpenDJ.git +https://github.com/JolantaWojcik/biojavaOwn.git +https://github.com/DirtyUnicorns/android_external_spongycastle.git +https://github.com/owlcs/owlapi.git +https://github.com/stormpath/stormpath-sdk-java.git +https://github.com/apache/clerezza.git +https://github.com/aht-group/utils.git +https://github.com/webanno/webanno.git +https://github.com/apache/mahout.git +https://github.com/hal/core.git +https://github.com/playernodie/weupnp.git +https://github.com/apache/servicemix-bundles.git +https://github.com/robovm/robovm.git +https://github.com/graphhopper/graphhopper.git +https://github.com/perfectsense/dari.git +https://github.com/svn2github/SAT4J.git +https://github.com/jeremylong/DependencyCheck.git +https://github.com/apache/qpid-proton.git +https://github.com/svn2github/forge.git +https://github.com/droolsjbpm/optaplanner.git +https://github.com/soluvas/soluvas-framework.git +https://github.com/droolsjbpm/droolsjbpm-integration.git +https://github.com/swagger-api/swagger-codegen.git +https://github.com/druid-io/druid.git +https://github.com/FasterXML/jackson-databind.git +https://github.com/biojava/biojava.git +https://github.com/telefonicaid/fiware-cosmos-ambari.git +https://github.com/kyoken74/gwt-angular.git +https://github.com/apache/commons-lang.git +https://github.com/scifio/scifio.git +https://github.com/alibaba/RocketMQ.git +https://github.com/kuujo/copycat.git +https://github.com/jankotek/MapDB.git +https://github.com/droolsjbpm/kie-wb-distributions.git +https://github.com/buschmais/jqassistant.git +https://github.com/PLOS/ambra.git +https://github.com/spring-cloud/spring-cloud-netflix.git +https://github.com/apache/commons-math.git +https://github.com/OpenHFT/Chronicle-Network.git +https://github.com/chocoteam/choco3.git +https://github.com/apache/flume.git +https://github.com/rinde/RinSim.git +https://github.com/kotcrab/VisEditor.git +https://github.com/void256/nifty-gui.git +https://github.com/apache/uima-ducc.git +https://github.com/windup/windup.git +https://github.com/heuermh/dishevelled.git +https://github.com/salyh/javamailspec.git +https://github.com/SpoutDev/Spout.git +https://github.com/antlr/antlr4.git +https://github.com/Flipkart/quartz.git +https://github.com/rasto/lcmc.git +https://github.com/mafagafogigante/dungeon.git +https://github.com/PerfGeeks/pinpoint.git +https://github.com/opendaylight/openflowplugin.git +https://github.com/EngineHub/CommandHelper.git +https://github.com/tananaev/traccar.git +https://github.com/BroadleafCommerce/BroadleafCommerce.git +https://github.com/apache/hadoop-common.git +https://github.com/spring-projects/spring-boot.git +https://github.com/bcdev/beam.git +https://github.com/weld/core.git +https://github.com/elki-project/elki.git +https://github.com/belaban/JGroups.git +https://github.com/molgenis/molgenis.git +https://github.com/lennartj/jaxb2-maven-plugin.git +https://github.com/bugcy013/opennms-tmp-tools.git +https://github.com/Talend/tcommon-studio-se.git +https://github.com/netty/netty.git +https://github.com/openmicroscopy/bioformats.git +https://github.com/droolsjbpm/guvnor.git +https://github.com/jetty-project/codehaus-jetty6.git +https://github.com/apache/jackrabbit-oak.git +https://github.com/deegree/deegree3.git +https://github.com/apache/sling.git +https://github.com/EriclLee/ActiveMQ-On-Azure.git +https://github.com/amplab/tachyon.git +https://github.com/neo4j/neo4j.git +https://github.com/darranl/directory-server.git +https://github.com/hazelcast/hazelcast.git +https://github.com/infinispan/infinispan.git +https://github.com/DSpace/DSpace.git +https://github.com/mulesoft/mule.git +https://github.com/imCodePartnerAB/imcms.git +https://github.com/backslash47/webstart-maven-plugin.git +https://github.com/apache/hbase.git +https://github.com/languagetool-org/languagetool.git +https://github.com/droolsjbpm/drools.git +https://github.com/SpigotMC/Spigot-API.git +https://github.com/irstv/orbisgis.git +https://github.com/jgheerardyn/yields.git +https://github.com/ning/killbill.git +https://github.com/jline/jline2.git +https://github.com/altran/Whydah-UserIdentityBackend.git +https://github.com/encog/encog-java-core.git +https://github.com/highsource/jaxb2-basics.git +https://github.com/msoute/vertx-deploy-tools.git +https://github.com/apache/portals-pluto.git +https://github.com/antlr/stringtemplate4.git +https://github.com/kongchen/swagger-maven-plugin.git +https://github.com/gertvv/addis.git +https://github.com/ProjectCCNx/ccnx.git +https://github.com/mulesoft/mule-common.git +https://github.com/cismet/cismap-plugin.git +https://github.com/greese/dasein-cloud-azure.git +https://github.com/Requinard/TeamTab.git +https://github.com/SonarSource/jenkins-sonar-plugin.git +https://github.com/fbacchella/jrds.git +https://github.com/cdi-spec/cdi-tck.git +https://github.com/gwtbootstrap3/gwtbootstrap3.git +https://github.com/apache/commons-dbcp.git +https://github.com/timmolter/XChart.git +https://github.com/aranega/testgen.git +https://github.com/mysticfall/pivot4j.git +https://github.com/datasift/datasift-java.git +https://github.com/viktor-z/fb2pdf.git +https://github.com/Cas-B/Group17-BubbleBobble.git +https://github.com/TheCoder4eu/BootsFaces-OSP.git +https://github.com/UnifiedViews/Plugin-DevEnv.git +https://github.com/jidesoft/jide-oss.git +https://github.com/codescape/bitvunit.git +https://github.com/grandwazir/BanHammer.git +https://github.com/wildfly/wildfly-arquillian.git +https://github.com/Eluinhost/pluginframework.git +https://github.com/mtedone/podam.git +https://github.com/Mobicents/jss7.git +https://github.com/basis-technology-corp/rosette-common-java-api.git +https://github.com/webbit/webbit.git +https://github.com/GWASpi/GWASpi.git +https://github.com/jenkinsci/warnings-plugin.git +https://github.com/google/auto.git +https://github.com/TechnicPack/LauncherV3.git +https://github.com/ArtificerRepo/artificer.git +https://github.com/Jahia/external-provider.git +https://github.com/ICGC-TCGA-PanCancer/SeqWare-CGP-SomaticCore.git +https://github.com/PEXPlugins/PermissionsEx.git \ No newline at end of file diff --git a/pom.xml b/pom.xml index f999f12..36579b2 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,13 @@ - + + + org.javatuples + javatuples + 1.2 + + edu.lu.uni simple-utils diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster.java index 3cbab7d..4282c0c 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster.java @@ -8,6 +8,7 @@ import com.github.gumtreediff.tree.ITree; import com.github.gumtreediff.tree.TreeContext; import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; import edu.lu.uni.serval.utils.FileHelper; + import org.javatuples.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster3.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster3.java index 3ebca74..78ba333 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster3.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoaderCluster3.java @@ -1,38 +1,42 @@ package edu.lu.uni.serval.FixPatternParser.violations; -import com.github.gumtreediff.actions.ActionGenerator; -import com.github.gumtreediff.actions.model.*; -import com.github.gumtreediff.matchers.Matcher; -import com.github.gumtreediff.matchers.Matchers; -import com.github.gumtreediff.tree.ITree; -import com.github.gumtreediff.tree.TreeContext; -import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap; -import edu.lu.uni.serval.gumtree.GumTreeComparer; -import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; -import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; -import edu.lu.uni.serval.utils.FileHelper; -import edu.lu.uni.serval.utils.ListSorter; -import org.apache.commons.lang3.StringUtils; -import org.javatuples.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import redis.clients.jedis.*; - -import java.io.*; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.time.Duration; -import java.util.*; -import java.util.concurrent.Executors; -import java.util.function.Consumer; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.commons.text.similarity.*; - import static edu.lu.uni.serval.FixPatternParser.violations.MultiThreadTreeLoader.getKeysByValue; import static edu.lu.uni.serval.FixPatternParser.violations.MultiThreadTreeLoaderCluster.fromString; +import java.io.BufferedOutputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.similarity.JaroWinklerDistance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.github.gumtreediff.tree.ITree; +import com.github.gumtreediff.tree.TreeContext; + +import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; +import edu.lu.uni.serval.utils.FileHelper; +import redis.clients.jedis.Jedis; +import redis.clients.jedis.JedisPool; +import redis.clients.jedis.JedisPoolConfig; +import redis.clients.jedis.ScanParams; +import redis.clients.jedis.ScanResult; + /** * Created by anilkoyuncu on 19/03/2018. */ diff --git a/src/main/java/edu/lu/uni/serval/Launcher.java b/src/main/java/edu/lu/uni/serval/Launcher.java index c67cc19..35ac556 100644 --- a/src/main/java/edu/lu/uni/serval/Launcher.java +++ b/src/main/java/edu/lu/uni/serval/Launcher.java @@ -24,7 +24,7 @@ public class Launcher { Properties appProps = new Properties(); - String appConfigPath = args[0]; + String appConfigPath = "/Users/kui.liu/Downloads/app.properties";//args[0]; // String appConfigPath = "/Users/anilkoyuncu/bugStudy/release/code/app.properties"; appProps.load(new FileInputStream(appConfigPath)); diff --git a/src/main/java/edu/lu/uni/serval/PatternExtractor.java b/src/main/java/edu/lu/uni/serval/PatternExtractor.java index f5e2459..8b04ba8 100644 --- a/src/main/java/edu/lu/uni/serval/PatternExtractor.java +++ b/src/main/java/edu/lu/uni/serval/PatternExtractor.java @@ -12,6 +12,8 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import edu.lu.uni.serval.FixPattern.utils.Checker; +import edu.lu.uni.serval.FixPatternParser.violations.MultiThreadTreeLoader; import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -175,15 +177,15 @@ public class PatternExtractor { } public static void getPattern(List fixes,String operation){ - String clusterPath = "/Users/anilkoyuncu/bugStudy/release/dataset/output/clusterallDatasetUPD/"; - String savePath = "/Users/anilkoyuncu/bugStudy/release/dataset/dumps/"; + String clusterPath = "/Users/kui.liu/Downloads/clusterallDatasetUPD/"; + String savePath = "/Users/kui.liu/Downloads/dumps/"; File patternsF = new File(clusterPath); File[] listOfPatterns = patternsF.listFiles(); Stream patterns = Arrays.stream(listOfPatterns); List patternsL = patterns .filter(x -> !x.getName().startsWith(".")) -// .filter(x-> x.getName().endsWith(".git")) +// .filter(x -> !x.getName().endsWith(".git")) .collect(Collectors.toList()); for (File pattern:patternsL) { @@ -192,7 +194,7 @@ public class PatternExtractor { Stream stream = Arrays.stream(listOfFiles); List patches = stream .filter(x -> !x.getName().startsWith(".")) -// .filter(x -> x.getName().endsWith(".git")) + .filter(x -> !x.getName().endsWith(".git")) .collect(Collectors.toList()); for (File patch : patches) { @@ -208,13 +210,26 @@ public class PatternExtractor { String content = new String(Files.readAllBytes(Paths.get(savePath + saveFN))); HierarchicalActionSet actionSet = (HierarchicalActionSet) fromString(content); + int astType = actionSet.getNode().getType(); + if (Checker.isStatement(astType) || astType == 23 //FieldDeclaration + || astType == 31 //MethodDeclaration + || astType == 55) {//TypeDeclaration + System.out.println(actionSet); +// ITree actionTree = MultiThreadTreeLoader.getActionTree(actionSet, null, null); +// ITree simpliedTree = getSimpliedTree(actionSet); +// System.out.println(new TreeToString().toString(simpliedTree)); +// System.out.println(new TreeToString().toString(actionTree)); + System.out.println("======"); + } + - ITree simpliedTree = getSimpliedTree(actionSet); - simpliedTree.toString(); +// ITree simpliedTree = getSimpliedTree(actionSet); +// simpliedTree.toString(); }catch (Exception e){ e.printStackTrace(); } } + System.out.println("============"); } } diff --git a/src/main/java/edu/lu/uni/serval/TreeToString.java b/src/main/java/edu/lu/uni/serval/TreeToString.java new file mode 100644 index 0000000..e234b69 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/TreeToString.java @@ -0,0 +1,47 @@ +package edu.lu.uni.serval; + +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap; + +public class TreeToString { + + private List strList = new ArrayList<>(); + + public String toString(ITree tree) { + List children = tree.getChildren(); + String str = ASTNodeMap.map.get(tree.getType()); + if (strList.size() == 0) { + strList.add(str); + for (ITree child : children) { + TreeToString t2s = new TreeToString(); + t2s.toString(child); + List strList1 = t2s.strList; + for (String str1 : strList1) { + strList.add("---" + str1); + } + } + } else { + strList.clear(); + strList.add(str); + for (ITree child : children) { + TreeToString t2s = new TreeToString(); + t2s.toString(child); + List strList1 = t2s.strList; + for (String str1 : strList1) { + strList.add("---" + str1); + } + } + } + + str = ""; + for (String str1 : strList) { + str += str1 + "\n"; + } + + return str; + } +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouperForC.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouperForC.java index d98ef8f..23df400 100644 --- a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouperForC.java +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouperForC.java @@ -1,15 +1,19 @@ package edu.lu.uni.serval.gumtree.regroup; -import com.github.gumtreediff.actions.model.*; -import com.github.gumtreediff.tree.ITree; -import edu.lu.uni.serval.FixPattern.utils.CNodeMap; -import edu.lu.uni.serval.gumtree.GumTreeComparer; -import edu.lu.uni.serval.utils.ListSorter; - -import java.io.File; import java.util.ArrayList; import java.util.List; +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.actions.model.Addition; +import com.github.gumtreediff.actions.model.Delete; +import com.github.gumtreediff.actions.model.Insert; +import com.github.gumtreediff.actions.model.Move; +import com.github.gumtreediff.actions.model.Update; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.FixPattern.utils.CNodeMap; +import edu.lu.uni.serval.utils.ListSorter; + /** * Regroup GumTree results to a hierarchical construction. * @@ -18,14 +22,14 @@ import java.util.List; */ public class HierarchicalRegrouperForC { - public static void main(String[] args) { - GumTreeComparer com = new GumTreeComparer(); - File cFile1 = new File("/Users/anilkoyuncu/bugStudy/dataset/GumTreeInput/linux-stable/prevFiles/prev_0a3d00_b404bc_drivers#pci#iov.c"); - File cFile2 = new File("/Users/anilkoyuncu/bugStudy/dataset/GumTreeInput/linux-stable/revFiles/0a3d00_b404bc_drivers#pci#iov.c"); - List action = com.compareTwoFilesWithGumTreeForCCode(cFile1, cFile2); - List actionSet = new HierarchicalRegrouperForC().regroupGumTreeResults(action); - System.out.println(actionSet); - } +// public static void main(String[] args) { +// GumTreeComparer com = new GumTreeComparer(); +// File cFile1 = new File("/Users/anilkoyuncu/bugStudy/dataset/GumTreeInput/linux-stable/prevFiles/prev_0a3d00_b404bc_drivers#pci#iov.c"); +// File cFile2 = new File("/Users/anilkoyuncu/bugStudy/dataset/GumTreeInput/linux-stable/revFiles/0a3d00_b404bc_drivers#pci#iov.c"); +// List action = com.compareTwoFilesWithGumTreeForCCode(cFile1, cFile2); +// List actionSet = new HierarchicalRegrouperForC().regroupGumTreeResults(action); +// System.out.println(actionSet); +// } List actionSets = new ArrayList<>();