From c626b3981f7e681ece77bb548ecf5a0907bff71d Mon Sep 17 00:00:00 2001 From: anil Date: Sat, 8 Aug 2020 10:18:39 +0200 Subject: [PATCH] changes --- .../gen/srcml/AbstractSrcmlTreeGenerator.java | 1 + .../gumtreediff/gen/srcml/NodeMap_new.java | 1 + python/data/datasets.csv | 328 +++++++++++ python/getIntroClass.py | 17 +- python/main.py | 514 +++++++++++++++++- python/otherDatasets.py | 35 +- python/patchManyBugs.py | 6 +- python/patch_validate.py | 3 +- python/sprinferIndex.py | 40 +- python/stats.py | 12 +- python/test_patched_file.py | 3 +- python/validate_manybugs.py | 4 +- 12 files changed, 920 insertions(+), 44 deletions(-) diff --git a/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/AbstractSrcmlTreeGenerator.java b/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/AbstractSrcmlTreeGenerator.java index 0d573fa..52fe4d5 100644 --- a/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/AbstractSrcmlTreeGenerator.java +++ b/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/AbstractSrcmlTreeGenerator.java @@ -177,6 +177,7 @@ public abstract class AbstractSrcmlTreeGenerator extends TreeGenerator { private void fixPos(TreeContext ctx) { for (ITree t : ctx.getRoot().postOrder()) { if (!t.isLeaf()) { + if(t.getType() == 0) continue; //put the keywords as labels // if(t.getType() == 34 || t.getType() ==37 || t.getType() ==38 || t.getType()==39 || t.getType() == 41 || t.getType()==45 || t.getType() ==55 || t.getType()==14){ // t.setLabel(NodeMap_new.map.get(t.getType())+" " +t.getLabel()); diff --git a/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/NodeMap_new.java b/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/NodeMap_new.java index 25162db..cc66456 100644 --- a/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/NodeMap_new.java +++ b/gumtree/gen.srcml/src/main/java/com/github/gumtreediff/gen/srcml/NodeMap_new.java @@ -200,6 +200,7 @@ public class NodeMap_new { map.put( 197 , "message"); map.put( 199 , "protocol_list"); map.put( 200 , "category"); + map.put( 201 , "clause"); // // map.put(1 , "unit"); diff --git a/python/data/datasets.csv b/python/data/datasets.csv index e57d44c..70f0248 100644 --- a/python/data/datasets.csv +++ b/python/data/datasets.csv @@ -26,3 +26,331 @@ linux, git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git gmp,https://github.com/ryepdx/gmp lighttpd1.4,https://github.com/lighttpd/lighttpd1.4.git lighttpd2,https://github.com/lighttpd/lighttpd2.git +xqemu,https://github.com/xqemu/xqemu +xqemu,https://github.com/xqemu/xqemu +git,https://github.com/git/git +gpdb,https://github.com/greenplum-db/gpdb +MonetDBLite-C,https://github.com/MonetDB/MonetDBLite-C +panda,https://github.com/panda-re/panda +freeradius-server,https://github.com/FreeRADIUS/freeradius-server +bind9,https://github.com/isc-projects/bind9 +kamailio,https://github.com/kamailio/kamailio +ompi,https://github.com/open-mpi/ompi +prrte,https://github.com/openpmix/prrte +openssl,https://github.com/openssl/openssl +NetworkManager,https://github.com/NetworkManager/NetworkManager +freeciv,https://github.com/freeciv/freeciv +pcp,https://github.com/performancecopilot/pcp +gnutls,https://github.com/gnutls/gnutls +afni,https://github.com/afni/afni +fontforge,https://github.com/fontforge/fontforge +valgrind,https://github.com/pmem/valgrind +ImageMagick6,https://github.com/ImageMagick/ImageMagick6 +rsyslog,https://github.com/rsyslog/rsyslog +gpac,https://github.com/gpac/gpac +PackageKit,https://github.com/hughsie/PackageKit +libdxf,https://github.com/bert/libdxf +contiki,https://github.com/contiki-os/contiki +syslog-ng,https://github.com/syslog-ng/syslog-ng +collectd,https://github.com/collectd/collectd +atheme,https://github.com/atheme/atheme +open-watcom-v2,https://github.com/open-watcom/open-watcom-v2 +tvheadend,https://github.com/tvheadend/tvheadend +eudev,https://github.com/gentoo/eudev +SRB2,https://github.com/STJr/SRB2 +hyperion,https://github.com/SDL-Hercules-390/hyperion +riscv-openocd,https://github.com/riscv/riscv-openocd +lxc,https://github.com/lxc/lxc +deadbeef,https://github.com/DeaDBeeF-Player/deadbeef +openocd,https://github.com/ilg-archived/openocd +tmux,https://github.com/ThomasAdam/tmux +strace,https://github.com/strace/strace +ack,https://github.com/davidgiven/ack +i3,https://github.com/Airblader/i3 +zstd,https://github.com/facebook/zstd +libyang,https://github.com/CESNET/libyang +mps,https://github.com/Ravenbrook/mps +acados,https://github.com/acados/acados +i3,https://github.com/i3/i3 +hashcat,https://github.com/hashcat/hashcat +freetds,https://github.com/FreeTDS/freetds +SLASHEM-Extended,https://github.com/SLASHEM-Extended/SLASHEM-Extended +xmake,https://github.com/xmake-io/xmake +indigo,https://github.com/indigo-astronomy/indigo +libpostal,https://github.com/openvenues/libpostal +openwsn-fw,https://github.com/openwsn-berkeley/openwsn-fw +skiboot,https://github.com/open-power/skiboot +civetweb,https://github.com/civetweb/civetweb +ga,https://github.com/GlobalArrays/ga +harvey,https://github.com/Harvey-OS/harvey +astrometry.net,https://github.com/dstndstn/astrometry.net +aranym,https://github.com/aranym/aranym +NyuziProcessor,https://github.com/jbush001/NyuziProcessor +Netatalk,https://github.com/Netatalk/Netatalk +libevent,https://github.com/libevent/libevent +PnetCDF,https://github.com/Parallel-NetCDF/PnetCDF +owfs,https://github.com/owfs/owfs +glfw,https://github.com/glfw/glfw +dateutils,https://github.com/hroptatyr/dateutils +alsa-lib,https://github.com/alsa-project/alsa-lib +notion,https://github.com/raboof/notion +mint-arena,https://github.com/zturtleman/mint-arena +citus,https://github.com/citusdata/citus +simh,https://github.com/simh/simh +file,https://github.com/file/file +aircrack-ng,https://github.com/aircrack-ng/aircrack-ng +nagios-plugins,https://github.com/nagios-plugins/nagios-plugins +librdkafka,https://github.com/edenhill/librdkafka +bitlbee,https://github.com/bitlbee/bitlbee +cdogs-sdl,https://github.com/cxong/cdogs-sdl +LCUI,https://github.com/lc-soft/LCUI +minisphere,https://github.com/fatcerberus/minisphere +monitoring-plugins,https://github.com/monitoring-plugins/monitoring-plugins +openrc,https://github.com/OpenRC/openrc +OpenDUNE,https://github.com/OpenDUNE/OpenDUNE +aerospike-client-c,https://github.com/aerospike/aerospike-client-c +libming,https://github.com/libming/libming +kvazaar,https://github.com/ultravideo/kvazaar +deltachat-core,https://github.com/deltachat/deltachat-core +ettercap,https://github.com/Ettercap/ettercap +wget2,https://github.com/rockdaboot/wget2 +geeqie,https://github.com/BestImageViewer/geeqie +libzip,https://github.com/nih-at/libzip +specfem2d,https://github.com/geodynamics/specfem2d +jailhouse,https://github.com/siemens/jailhouse +xash3d,https://github.com/FWGS/xash3d +oniguruma,https://github.com/kkos/oniguruma +openvpn,https://github.com/OpenVPN/openvpn +libopencm3,https://github.com/libopencm3/libopencm3 +atari800,https://github.com/atari800/atari800 +htslib,https://github.com/samtools/htslib +luajit2,https://github.com/openresty/luajit2 +dnscrypt-proxy,https://github.com/dyne/dnscrypt-proxy +ccan,https://github.com/rustyrussell/ccan +rumprun,https://github.com/rumpkernel/rumprun +testdisk,https://github.com/cgsecurity/testdisk +ccextractor,https://github.com/CCExtractor/ccextractor +hackrf,https://github.com/mossmann/hackrf +flex,https://github.com/westes/flex +libmdbx,https://github.com/erthink/libmdbx +PowerShell-DSC-for-Linux,https://github.com/Microsoft/PowerShell-DSC-for-Linux +lcdproc,https://github.com/lcdproc/lcdproc +cmus,https://github.com/cmus/cmus +libstoragemgmt,https://github.com/libstorage/libstoragemgmt +ish,https://github.com/ish-app/ish +celix,https://github.com/apache/celix +nfft,https://github.com/NFFT/nfft +xf86-input-wacom,https://github.com/linuxwacom/xf86-input-wacom +libcaca,https://github.com/cacalabs/libcaca +lldpd,https://github.com/vincentbernat/lldpd +speex,https://github.com/xiph/speex +Icecast-Server,https://github.com/xiph/Icecast-Server +mosquitto,https://github.com/eclipse/mosquitto +aravis,https://github.com/AravisProject/aravis +tcsh,https://github.com/tcsh-org/tcsh +pysam,https://github.com/pysam-developers/pysam +c-blosc2,https://github.com/Blosc/c-blosc2 +blis,https://github.com/flame/blis +libiio,https://github.com/analogdevicesinc/libiio +ngs,https://github.com/ngs-lang/ngs +rauc,https://github.com/rauc/rauc +minizip,https://github.com/nmoinvaz/minizip +tlf,https://github.com/Tlf/tlf +NumCosmo,https://github.com/NumCosmo/NumCosmo +pilight,https://github.com/pilight/pilight +pg_pathman,https://github.com/postgrespro/pg_pathman +libfuse,https://github.com/libfuse/libfuse +cgreen,https://github.com/cgreen-devs/cgreen +hawq,https://github.com/apache/hawq +check,https://github.com/libcheck/check +mpifileutils,https://github.com/hpc/mpifileutils +ck,https://github.com/concurrencykit/ck +openpbs,https://github.com/openpbs/openpbs +oftc-ircservices,https://github.com/oftc/oftc-ircservices +systemshock,https://github.com/Interrupt/systemshock +dynomite,https://github.com/Netflix/dynomite +bahamut,https://github.com/DALnet/bahamut +tinyproxy,https://github.com/tinyproxy/tinyproxy +crazyflie-firmware,https://github.com/bitcraze/crazyflie-firmware +librsync,https://github.com/librsync/librsync +gdnsd,https://github.com/gdnsd/gdnsd +Zenroom,https://github.com/dyne/Zenroom +yubico-piv-tool,https://github.com/Yubico/yubico-piv-tool +petitboot,https://github.com/open-power/petitboot +mercury,https://github.com/mercury-hpc/mercury +libvmi,https://github.com/libvmi/libvmi +Criterion,https://github.com/Snaipe/Criterion +libqb,https://github.com/ClusterLabs/libqb +paho.mqtt.c,https://github.com/eclipse/paho.mqtt.c +mupnp,https://github.com/cybergarage/mupnp +raft,https://github.com/canonical/raft +libdill,https://github.com/sustrik/libdill +pupnp,https://github.com/pupnp/pupnp +munge,https://github.com/dun/munge +nginx-vod-module,https://github.com/kaltura/nginx-vod-module +TIC-80,https://github.com/nesbox/TIC-80 +ubertooth,https://github.com/greatscottgadgets/ubertooth +controller,https://github.com/kiibohd/controller +tarsnap,https://github.com/Tarsnap/tarsnap +miniJVM,https://github.com/digitalgust/miniJVM +libmaxminddb,https://github.com/maxmind/libmaxminddb +packetgraph,https://github.com/outscale/packetgraph +zmap,https://github.com/zmap/zmap +tpm2-abrmd,https://github.com/tpm2-software/tpm2-abrmd +dps-for-iot,https://github.com/intel/dps-for-iot +libplacebo,https://github.com/haasn/libplacebo +parasail,https://github.com/jeffdaily/parasail +powerman,https://github.com/chaos/powerman +rmw,https://github.com/theimpossibleastronaut/rmw +mpb,https://github.com/NanoComp/mpb +mapcache,https://github.com/mapserver/mapcache +secp256k1,https://github.com/bitcoin-core/secp256k1 +app,https://github.com/Studio-Link/app +portable,https://github.com/libressl-portable/portable +openbsm,https://github.com/openbsm/openbsm +Picnic,https://github.com/IAIK/Picnic +Unity,https://github.com/ThrowTheSwitch/Unity +cyclonedds,https://github.com/eclipse-cyclonedds/cyclonedds +inadyn,https://github.com/troglobit/inadyn +input-wacom,https://github.com/linuxwacom/input-wacom +my_basic,https://github.com/paladin-t/my_basic +pure-ftpd,https://github.com/jedisct1/pure-ftpd +fakechroot,https://github.com/dex4er/fakechroot +liboqs,https://github.com/open-quantum-safe/liboqs +libatomic_ops,https://github.com/ivmai/libatomic_ops +netcode,https://github.com/networkprotocol/netcode +fwup,https://github.com/fhunleth/fwup +otfcc,https://github.com/caryll/otfcc +mcu,https://github.com/digitalbitbox/mcu +cglm,https://github.com/recp/cglm +gphoto2,https://github.com/gphoto/gphoto2 +ksmbd-tools,https://github.com/namjaejeon/ksmbd-tools +unabto,https://github.com/nabto/unabto +SSLproxy,https://github.com/sonertari/SSLproxy +logrotate,https://github.com/logrotate/logrotate +trunk,https://github.com/idathena/trunk +duc,https://github.com/zevv/duc +parodus,https://github.com/xmidt-org/parodus +crust,https://github.com/crust-firmware/crust +liblognorm,https://github.com/rsyslog/liblognorm +axel,https://github.com/axel-download-accelerator/axel +MultiMarkdown-6,https://github.com/fletcher/MultiMarkdown-6 +dqlite,https://github.com/canonical/dqlite +snowball,https://github.com/snowballstem/snowball +libqrencode,https://github.com/fukuchi/libqrencode +pesign,https://github.com/rhboot/pesign +3CeAM,https://github.com/3CeAM/3CeAM +vnstat,https://github.com/vergoh/vnstat +scs,https://github.com/cvxgrp/scs +snoopy,https://github.com/a2o/snoopy +libtpms,https://github.com/stefanberger/libtpms +sniproxy,https://github.com/dlundquist/sniproxy +ngx_php7,https://github.com/rryqszq4/ngx_php7 +mod_perimeterx,https://github.com/PerimeterX/mod_perimeterx +luvi,https://github.com/luvit/luvi +Clight,https://github.com/FedeDP/Clight +imv,https://github.com/eXeC64/imv +UDUNITS-2,https://github.com/Unidata/UDUNITS-2 +redshift,https://github.com/jonls/redshift +glorytun,https://github.com/angt/glorytun +liblouisutdml,https://github.com/liblouis/liblouisutdml +openpace,https://github.com/frankmorgner/openpace +proftpd-mod_proxy,https://github.com/Castaglia/proftpd-mod_proxy +f-stack,https://github.com/F-Stack/f-stack +ior,https://github.com/hpc/ior +watchdogd,https://github.com/troglobit/watchdogd +gnome-inform7,https://github.com/ptomato/gnome-inform7 +sled,https://github.com/shinyblink/sled +Microduino-IDE-Support,https://github.com/wasdpkj/Microduino-IDE-Support +aws-c-common,https://github.com/awslabs/aws-c-common +noise-repellent,https://github.com/lucianodato/noise-repellent +Little-CMS,https://github.com/mm2/Little-CMS +Adafruit_nRF52_Bootloader,https://github.com/adafruit/Adafruit_nRF52_Bootloader +jasper,https://github.com/jasper-software/jasper +can-utils,https://github.com/linux-can/can-utils +masscan,https://github.com/robertdavidgraham/masscan +balde,https://github.com/balde/balde +libfreefare,https://github.com/nfc-tools/libfreefare +GNUSim8085,https://github.com/GNUSim8085/GNUSim8085 +pick,https://github.com/mptre/pick +ksm,https://github.com/asamy/ksm +libctl,https://github.com/NanoComp/libctl +libepoxy,https://github.com/anholt/libepoxy +libfort,https://github.com/seleznevae/libfort +cpuid,https://github.com/tycho/cpuid +x11vnc,https://github.com/LibVNC/x11vnc +dreamchess,https://github.com/dreamchess/dreamchess +umockdev,https://github.com/martinpitt/umockdev +nats.c,https://github.com/nats-io/nats.c +binjgb,https://github.com/binji/binjgb +mcfgthread,https://github.com/lhmouse/mcfgthread +bluez-alsa,https://github.com/Arkq/bluez-alsa +rtty,https://github.com/zhaojh329/rtty +ExternData,https://github.com/modelica-3rdparty/ExternData +c-tap-harness,https://github.com/rra/c-tap-harness +irqbalance,https://github.com/Irqbalance/irqbalance +OpenHMD,https://github.com/OpenHMD/OpenHMD +vmemcache,https://github.com/pmem/vmemcache +nxdk,https://github.com/XboxDev/nxdk +clr-boot-manager,https://github.com/clearlinux/clr-boot-manager +digest,https://github.com/eddelbuettel/digest +epk2extract,https://github.com/openlgtv/epk2extract +ENet-CSharp,https://github.com/nxrighthere/ENet-CSharp +ChameleonMini-rebooted,https://github.com/iceman1001/ChameleonMini-rebooted +mod-host,https://github.com/moddevices/mod-host +fzy,https://github.com/jhawthorn/fzy +sjs,https://github.com/saghul/sjs +opusfile,https://github.com/xiph/opusfile +libuev,https://github.com/troglobit/libuev +grub4dos,https://github.com/chenall/grub4dos +logswan,https://github.com/fcambus/logswan +gdk,https://github.com/Blockstream/gdk +sshfs,https://github.com/libfuse/sshfs +libtelnet,https://github.com/seanmiddleditch/libtelnet +biscuit,https://github.com/zhou-lab/biscuit +librtlsdr,https://github.com/steve-m/librtlsdr +vx32,https://github.com/0intro/vx32 +twolame,https://github.com/njh/twolame +cminpack,https://github.com/devernay/cminpack +sftpserver,https://github.com/ewxrjk/sftpserver +arduino-device-lib,https://github.com/TheThingsNetwork/arduino-device-lib +stream-lua-nginx-module,https://github.com/openresty/stream-lua-nginx-module +oppai-ng,https://github.com/Francesco149/oppai-ng +ccnet,https://github.com/haiwen/ccnet +lexbor,https://github.com/lexbor/lexbor +i3lock,https://github.com/i3/i3lock +par,https://github.com/prideout/par +Adafruit-GFX-Library,https://github.com/adafruit/Adafruit-GFX-Library +munin-c,https://github.com/munin-monitoring/munin-c +libevt,https://github.com/libyal/libevt +yabar,https://github.com/geommer/yabar +libvmod-redis,https://github.com/carlosabalde/libvmod-redis +FiSH-irssi,https://github.com/falsovsky/FiSH-irssi +bcal,https://github.com/jarun/bcal +esnacc-ng,https://github.com/esnacc/esnacc-ng +libhydrogen,https://github.com/jedisct1/libhydrogen +aws-c-io,https://github.com/awslabs/aws-c-io +varnish-modules,https://github.com/varnish/varnish-modules +rcppredis,https://github.com/eddelbuettel/rcppredis +cgltf,https://github.com/jkuhlmann/cgltf +kafkacat,https://github.com/edenhill/kafkacat +gdigi,https://github.com/desowin/gdigi +abcm2ps,https://github.com/leesavide/abcm2ps +proxytunnel,https://github.com/proxytunnel/proxytunnel +libphenom,https://github.com/facebookarchive/libphenom +evdi,https://github.com/DisplayLink/evdi +enca,https://github.com/nijel/enca +M7M01_MuEukaron,https://github.com/EDI-Systems/M7M01_MuEukaron +libva-utils,https://github.com/intel/libva-utils +Enterprise,https://github.com/SevenBits/Enterprise +rem,https://github.com/creytiv/rem +SDL_kitchensink,https://github.com/katajakasa/SDL_kitchensink +sntpd,https://github.com/troglobit/sntpd +Virtual-Assistant,https://github.com/ritwik12/Virtual-Assistant +port-mirroring,https://github.com/mmaraya/port-mirroring +telize,https://github.com/fcambus/telize +zhe,https://github.com/atolab/zhe +Tilengine,https://github.com/megamarc/Tilengine +Netopeer2,https://github.com/CESNET/Netopeer2 +libvmod-curl,https://github.com/varnish/libvmod-curl +ocaml-freestanding,https://github.com/mirage/ocaml-freestanding diff --git a/python/getIntroClass.py b/python/getIntroClass.py index c7a158b..d47358e 100644 --- a/python/getIntroClass.py +++ b/python/getIntroClass.py @@ -25,10 +25,21 @@ def export(): os.mkdir(join(BUGDIR)) # bugList = [i.replace(':', '-').replace('manybugs-', 'squareslab/manybugs:') for i in bugList] # exportCore(bugList[0]) + cmd = 'bugzoo bug list' + logging.info(cmd) + output, e = shellGitCheckout(cmd) bugList = [] - with open(introClassFile, 'r') as file: - for line in file.readlines(): - bugList.append(line.strip()) + for line in output.splitlines(): + if line == '': + continue + potentialId = line.split('|')[1].strip() + if potentialId.startswith('introclass'): + if 'Yes' in line: + bugList.append(potentialId) + + # with open(introClassFile, 'r') as file: + # for line in file.readlines(): + # bugList.append(line.strip()) print("bugList length: {}".format(len(bugList))) for b in bugList: exportCore(b) diff --git a/python/main.py b/python/main.py index 970018e..fddfed4 100644 --- a/python/main.py +++ b/python/main.py @@ -33,7 +33,7 @@ if __name__ == '__main__': # subject = 'ALL' # rootType = 'if' - + # job = 'patterns' print(job) @@ -99,21 +99,21 @@ if __name__ == '__main__': elif job =='indexClusters': from sprinferIndex import runSpinfer runSpinfer() - - from sprinferIndex import test - test() + # + # from sprinferIndex import test + # test() from sprinferIndex import divideCoccis divideCoccis() from sprinferIndex import removeDuplicates removeDuplicates() elif job == 'evalManyBugs': - from patchManyBugs import patchCore - patchCore() - # from patchManyBugs import patched - # patched() - from patchManyBugs import exportSosPatches - exportSosPatches() + # from patchManyBugs import patchCore + # patchCore() + ## from patchManyBugs import patched + ## patched() + # from patchManyBugs import exportSosPatches + # exportSosPatches() from validate_manybugs import validate validate() @@ -178,8 +178,502 @@ if __name__ == '__main__': elif job == 'patterns': from stats import exportAbstractPatterns exportAbstractPatterns() + elif job == 'travis': + + if isfile(join(DATA_PATH,'repoList')): + repoList = load_zipped_pickle(join(DATA_PATH,'repoList')) + else: + jobsPath = '/Users/anil.koyuncu/Downloads/jobs/' + files = listdir(jobsPath) + + def findRepos(file): + a = pd.read_json(join(jobsPath, file)) + repoList = a[a.config.apply(lambda x: x['language'] == 'c')].repository_slug.unique().tolist() + return repoList + repos = parallelRunMerge(findRepos,files) + repos + repoList = list(itertools.chain.from_iterable(repos)) + repoList = list(set(repoList)) + + save_zipped_pickle(repoList,join(DATA_PATH,'repoList')) + repoList + if not isfile(join(DATA_PATH,'repoDF')): + import requests + + results = [] + + start = 1 + prevDF = load_zipped_pickle( join(DATA_PATH, 'repoDF')) + prevMap = prevDF.set_index('repo').to_dict()['commitCount'] + repoDF = pd.DataFrame(columns=['repo', 'commitCount']) + + for idx, repo in enumerate(prevDF.repo.values.tolist()): + # commit count + + import time + + # print("Printed immediately.") + # time.sleep(0.1) + # resp = requests.get('https://github.com/'+repo) + # if resp.status_code != 200: + # continue + # repoDF.loc[idx] = [repo, '',False] + # else: + if repo in prevMap: + count = prevMap[repo] + if count == '': + resp = requests.get('https://github.com/' + repo) + print(repo) + repo = resp.url.split('https://github.com/')[-1] + print(repo) + cmd = 'curl -H "Authorization: token 39af4590fec0181ee47c17104d46aa179560b1d9" -I -k "https://api.github.com/repos/'+repo+'/commits?per_page=1"'+ " | sed -n '/^[Ll]ink:/ s/.*\"next\".*page=\([0-9]*\).*\"last\".*/\\1/p'" + o,e=shellGitCheckout(cmd) + print(e) + print(o) + count = o.strip() + repoDF.loc[idx] = [repo, count] + # import bs4 as bs + # + # from urllib.request import urlopen + # from urllib import error + # import urllib + # + # import socket + # + # timeout = 30 + # socket.setdefaulttimeout(timeout) + # + # import logging + # + # hdr = { + # 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + # 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + # 'Accept-Encoding': 'none', + # 'Accept-Language': 'en-US,en;q=0.8', + # 'Connection': 'keep-alive'} + # + # req = urllib.request.Request('https://github.com/'+repo, headers=hdr) + # + # response = urlopen(req) + # the_page = response.read() + # + # soup = bs.BeautifulSoup(the_page, "html.parser") + # meta = soup.find('meta', {'name': 'octolytics-dimension-repository_is_fork'}) + # + # repoDF.loc[idx] = [repo,count,meta['content']] + + repoDF + + def getGitHubApi(r): + headers = {'Authorization': 'token %s' % '39af4590fec0181ee47c17104d46aa179560b1d9'} + resp = requests.get('https://api.github.com/repos/' + r, headers=headers) + if resp.status_code != 200: + print(resp.status_code) + return resp.json() + + repoDF['info']= repoDF.repo.apply(lambda x:getGitHubApi(x)) + else: + repoDF= load_zipped_pickle(join(DATA_PATH, 'repoDF')) + + repoDF['isFork'] = repoDF['info'].apply(lambda x: x['fork']) + nonForks = repoDF[repoDF['isFork'] == False] + nonForks['forks'] = nonForks['info'].apply(lambda x: x['forks']) + nonForks['open_issues'] = nonForks['info'].apply(lambda x: x['open_issues']) + nonForks['watchers'] = nonForks['info'].apply(lambda x: x['watchers']) + nonForks['subscribers_count'] = nonForks['info'].apply(lambda x: x['subscribers_count']) + + + + nonForks.commitCount = nonForks.commitCount.apply(lambda x:int(x) if x!='' else 0) + nonForks.sort_values(by='commitCount', ascending=False, inplace=True) + nonForks['language'] = nonForks['info'].apply(lambda x: x['language']) + nonForks[nonForks['language'] == 'C'] + # save_zipped_pickle(repoDF, join(DATA_PATH, 'repoDF')) + resp = requests.get( + 'https://api.travis-ci.org/repos/php/php-src/builds?after_number=' + str(start * 25)) + # if resp.status_code != 200: + # # This means something went wrong. + # raise Exception('GET /tasks/ {}'.format(resp.status_code)) + # results.append(resp.json()) + # start += 1 + + + + elif job == 'test': + if isfile(join(DATA_PATH, 'buildDFprocessed.pickle')): + res= load_zipped_pickle(join(DATA_PATH, 'buildDFprocessed.pickle')) + else: + if isfile(join(DATA_PATH, 'buildDF.pickle')): + buildDF = load_zipped_pickle(join(DATA_PATH, 'buildDF.pickle')) + else: + import requests + + results = [] + + start = 1 + while( start* 25 < 9250): + resp = requests.get('https://api.travis-ci.org/repos/kamailio/kamailio/builds?after_number='+str(start*25)) + if resp.status_code != 200: + # This means something went wrong. + raise Exception('GET /tasks/ {}'.format(resp.status_code)) + results.append(resp.json()) + start+=1 + + buildList = list(itertools.chain.from_iterable(results)) + buildDF = pd.DataFrame(buildList) + save_zipped_pickle(buildDF, join(DATA_PATH, 'buildDF.pickle')) + + master = buildDF[buildDF.branch == 'master'] + master = master[master.result == 1] + + failingBuilds = master.number.values.tolist() + master['inducing'] = master.number.apply(lambda x: str(int(x) - 1) not in failingBuilds) + bugInducingBuilds = master[master.inducing == True] + bugInducingBuilds['number'] = bugInducingBuilds.number.apply(lambda x: str(x).zfill(5)) + bugInducingBuilds.sort_values(by='number', inplace=True) + + pushes = bugInducingBuilds[bugInducingBuilds.event_type == 'push'] + + DATASET_PATH = os.environ["REPO_PATH"] + def checkStat(x, dsName): + # repo = '/Users/anil.koyuncu/projects/linux' + cmd = 'git -C ' + join(DATASET_PATH, dsName) + ' show ' + x + " --pretty=\"format:\" --stat -M100%" + + out, err = shellGitCheckout(cmd, enc='latin1') + log = {} + lines = out.strip().split('\n') + for line in lines[:-1]: + fname,ftype= line.split('|') + fname = fname.strip() + ftype= ftype.strip() + if not (ftype == '0' or ftype == 'Bin'): + + ftypes = set(ftype.split(' ')[1]) + if len(ftypes) == 2: + ftype = 'M' + else: + if list(ftypes)[0] == '+': + ftype = 'A' + else: + ftype = 'D' + + log[fname] = ftype + log + df = pd.DataFrame(data=[[log, x]], columns=['files', 'commit']) + return df + + from common.commons import parallelRunMergeNew + + gitrepo = 'kamailio' + # checkStat('06e06f026d14e7d61f2ceb14445a683abbbf91bb', gitrepo) + m = parallelRunMergeNew(checkStat, pushes['commit'].values.tolist(), gitrepo) + res = pd.merge(pushes, m, on=['commit']) + #remove empty commits + res = res[res.files != {}] + # c changes + res = res[res.files.apply(lambda x: np.any([i.endswith('.c') for i in x.keys()]))] + + def getJobs(x): + import requests + resp = requests.get('https://api.travis-ci.org/repos/kamailio/kamailio/builds/'+str(x)) + if resp.status_code != 200: + # This means something went wrong. + raise Exception('GET /tasks/ {}'.format(resp.status_code)) + result = resp.json() + + return result + + res['buildDetails'] = res['id'].apply(lambda x:getJobs(x)) + save_zipped_pickle(res, join(DATA_PATH, 'buildDFprocessed.pickle')) + res + + def getMatrix(x): + if len(x) == 1: + return False + else: + for jobs in x: + if('arch' in jobs['config']): + if (jobs['config']['arch'] == 'amd64' and jobs['result'] == 1): + return True + res['goodBuild'] = res.buildDetails.apply(lambda x:getMatrix(x['matrix'])) + + f = '/Users/anil.koyuncu/projects/datasets/kamailio' + gitrepo = 'kamailio' + + if isfile(join(DATA_PATH,gitrepo + '.df')): + res = load_zipped_pickle(join(DATA_PATH,gitrepo + '.df')) + else: + from commitCollector import makeDF + + # + # cmd = 'git -C '+f +" log --no-merges --pretty=format:'{\"commit\":\"%H\",\"commitDate\":\"%ci\",\"title\":\"%f\",\"committer\":\"%ce\"}' --follow -- .travis.yml > "+join(DATA_PATH,gitrepo+'.travis') + # output = shellCallTemplate(cmd,enc='latin1') + # + # commits = makeDF(join(DATA_PATH,gitrepo+'.travis')) + + from otherDatasets import checkCommitLog + + + # cmd = 'git log --no-merges --pretty=format:'{"commit":"%H","commitDate":"%ci","title":"%f","committer":"%ce"}' > ../tcl.commits' + cmd = 'git -C ' + f + " log --simplify-merges --date-order -m --pretty=format:'{\"commit\":\"%H\",\"commitDate\":\"%ci\",\"title\":\"%f\",\"committer\":\"%ce\"}' > " + join(DATA_PATH,gitrepo + '.commits') + output = shellCallTemplate(cmd,enc='latin1') + + commits = makeDF(join(DATA_PATH,gitrepo + '.commits')) + from common.commons import parallelRunMergeNew + m = parallelRunMergeNew(checkCommitLog, commits['commit'].values.tolist(), gitrepo) + res = pd.merge(commits, m, on=['commit']) + # from common.commons import save_zipped_pickle + # save_zipped_pickle(res,join(DATA_PATH,gitrepo + '.df')) + + + def getRevParse(x): + cmd = 'git -C ' + f + ' rev-parse ' + x + '^@' + out, err = shellGitCheckout(cmd, enc='latin1') + rev = out.strip().split('\n') + # if len(rev) != 1: + # print('error') + + df = pd.DataFrame(data=[[rev, x]], columns=['parent', 'commit']) + return df + + + m = parallelRunMergeNew(getRevParse, res['commit'].values.tolist()) + res1 = pd.merge(res, m, on=['commit']) + save_zipped_pickle(res1, join(DATA_PATH, gitrepo + '.df')) + + # workList = [] + # for i,commit in enumerate(res['commit'].values.tolist()): + # os.makedirs(join(DATA_PATH, 'tcl_patches',str(i).zfill(5) +'_'+commit )) + # cmd = 'git -C ' + f + ' format-patch -M100% --text --full-index --binary -n '+ commit+'^..'+commit +' -o ' + join(DATA_PATH, 'tcl_patches',str(i).zfill(5) +'_'+commit) + # workList.append(cmd) + # + # parallelRun(shellGitCheckout,workList) + + + # patches = listdir('/Users/anil.koyuncu/projects/datasets/test7') + + travisChanges = res[res.files.apply(lambda x: np.any([i == '.travis.yml' for i in x.keys()]))] + travisCommits = travisChanges.commit.values.tolist() + # for travis in travisCommits: + # cmd = 'git -C ' + f + ' checkout -f '+ travis + # o, e = shellGitCheckout(cmd) + # os.makedirs(join(DATA_PATH, 'travis_commits', travis), exist_ok=True) + # o,e + # print(o) + # print(e) + # cmd = 'cp '+ join(f,'.travis.yml') +" "+ join(DATA_PATH, 'travis_commits', travis) + # o, e = shellGitCheckout(cmd) + # o,e + # newContent = '' + # with open(join(DATA_PATH, 'travis_commits', travis,'.travis.yml'),'r') as file: + # travisFile = file.read() + # regex = r"include:(.*)before_install:" + # matches = re.finditer(regex, travisFile, re.MULTILINE|re.DOTALL) + # + # for matchNum, match in enumerate(matches, start=1): + # grs = match.groups() + # if len(re.findall('- name:', match.group(1))) > 0: + # newContent = travisFile.replace(grs[0], + # '\n - name: "Linux/GCC/Shared"\n os: linux\n dist: bionic\n compiler: gcc\n env:\n - BUILD_DIR=unix\n') + # else: + # newContent = travisFile.replace(grs[0],'\n - os: linux\n dist: xenial\n compiler: clang\n env:\n - BUILD_DIR=unix\n') + # + # with open(join(DATA_PATH, 'travis_commits', travis,'.travis.yml'),'w') as file: + # file.write(newContent) + + + res = res.head(res[res.commit == '1cb8068b970358e7b0f47935a2636ca6ae8eceb6'].iloc[0].name) + res.sort_index(inplace=True, ascending=False) + # res = res[res.parent.apply(lambda x: len(x) == 1)] + commits = res.commit.values.tolist() + + branchName = 'test' + + for commit in commits: + print(commit) + cmd = 'git -C ' + f + ' checkout -f '+ commit + o, e = shellGitCheckout(cmd) + o,e + print(e) + + if not isfile(join(f, '.travis.yml')): + continue + + + # if commit in travisCommits: + # cmd = 'git -C ' + f + ' pull -Xtheirs origin ' + branchName + # o, e = shellGitCheckout(cmd) + # o,e + # print(e) + # + # cmd = 'cp ' + join(DATA_PATH, 'travis_commits', commit,'.travis.yml')+ " " + f + # o, e = shellGitCheckout(cmd) + # o,e + # print(e) + # cmd = 'git -C ' + f + ' commit -a -m ' + "'travis'" + # o, e = shellGitCheckout(cmd) + # o,e + # print(e) + # else: + # + # cmd = 'git -C ' + f + ' pull -Xtheirs origin ' + branchName + # o, e = shellGitCheckout(cmd) + # o,e + # print(e) + + cmd = 'git -C ' + f + ' push origin HEAD:'+branchName + o, e = shellGitCheckout(cmd) + o,e + print(e) + if e.startswith('error'): + print(e) + time.sleep(10) + + # ######export patches + # res['path'] = res[['commit','parent']].apply(lambda x: [(i,x['commit']) for i in x['parent']] ,axis=1) + # paths = (list(itertools.chain.from_iterable(res.path.values.tolist()))) + # + # + # # res['pairs'] = res.apply(lambda x: (x['commit'], x['parent']) if x['parent'] != None else None, axis=1) + # # res['tuples'] = res.pairs.apply(lambda x: tuple(x)) + # # col_combi = res.tuples.values.tolist() + # import networkx + # # # + # g = networkx.Graph(paths) + # cluster = [] + # for subgraph in networkx.connected_component_subgraphs(g): + # logging.info('Cluster size %d', len(subgraph.nodes())) + # cluster.append(subgraph.nodes()) + # cluster + # # p = networkx.shortest_path(g) + # # p + # + # parentDictionary= dict(networkx.dfs_successors(g, source='')) + # workList = [] + # for i,c in enumerate(list(pairwise(list(networkx.dfs_preorder_nodes(g, source=''))))): + # parent,commit = c + # if parent =='': + # continue + # os.makedirs(join(DATA_PATH, 'tcl_patches_',str(i).zfill(5) +'_'+parent ),exist_ok=True) + # + # cmd = 'git -C ' + f + ' format-patch -M100% --text --full-index --binary -n '+ parent+'..'+commit +' -o ' + join(DATA_PATH, 'tcl_patches_',str(i).zfill(5) +'_'+parent) + # workList.append(cmd) + # + # parallelRun(shellGitCheckout,workList) + # + # ########end export patches + # + # # import matplotlib.pyplot as plt + # # networkx.draw(g, with_labels=True, font_weight='bold') + # # plt.subplot(122) + # # cluster.sort(key=len, reverse=True) + # # connectedCommits = [i for i in cluster[0]] #10280 + # # res['connected']=res.commit.apply(lambda x: x in connectedCommits) + # # + # # selectedPatches = res[['commit','rowIndex', 'parent', 'connected', 'parentPos']] + # # selectedPatches.sort_values(by='parentPos', ascending=False, inplace=True) + # # selectedPatches[selectedPatches['connected'] == True] + # + # + # # + # # # parentDict = res[['parent']].to_dict() + # # # pDict = {value: key for key, value in parentDict['parent'].items()} + # # # res['hasParent'] = res.commit.apply(lambda x: x in pDict) + # # ## find good commits + # # # orderDict = res[['commit']].to_dict() + # # # + # # # dict = {value: key for key, value in orderDict['commit'].items()} + # # # + # # # res['parentPos'] =res.parent.apply(lambda x: dict[x] if x in dict else None) + # # # + # # # + # # # + # # # + # # # def rowIndex(row): + # # # return row.name + # # # + # # # + # # # res['rowIndex'] = res.apply(rowIndex, axis=1) + # # # + # # # save_zipped_pickle(res, join(DATA_PATH, gitrepo + '.df')) + # # ## end find good commits + # # travisChanges = res[res.files.apply(lambda x: np.any([i == '.travis.yml' for i in x.keys()]))] + # # travisCommits = travisChanges.commit.values.tolist() + # # + # # + # # for i, commit in zip(selectedPatches['rowIndex'].values.tolist(),selectedPatches['commit'].values.tolist()): + # # print(i,commit) + # # if commit in travisCommits: + # # print() + # # patch = str(i).zfill(5) + "_" + commit + # # p = listdir(join(DATA_PATH, 'tcl_patches', patch)) + # # if(len(p) == 0): + # # continue + # # if(len(p) > 1): + # # print('error') + # # cmd = 'git -C ' + f + ' am -q -3 --whitespace=nowarn ' + join(DATA_PATH, 'tcl_patches', patch, p[0]) + # # o,e= shellGitCheckout(cmd) + # # # print(o) + # # # print(e) + # # + # # if len(e)> 0: + # # print(patch) + # # + # # cmd = 'git -C ' + f + ' push' + # # o, e = shellGitCheckout(cmd) + # # o,e + # + # patches = listdir(join(DATA_PATH, 'tcl_patches_')) + # patches.sort() + # + # for patch in patches: + # if patch == '.DS_Store': + # continue + # # if patch.split('_')[1] in travisCommits: + # # print() + # p = listdir(join(DATA_PATH, 'tcl_patches_', patch)) + # + # if(len(p) == 0): + # continue + # p.sort() + # if(len(p) > 1): + # print('error') + # + # # if(p[0] == '0001-Initial-revision.patch'): + # # continue + # for actualPatch in p: + # cmd = 'git -C ' + f + ' am -q -3 --whitespace=nowarn ' + join(DATA_PATH, 'tcl_patches_', patch, actualPatch) + # # cmd = 'git -C ' + f + ' am -q -3 --whitespace=nowarn ' + join('/Users/anil.koyuncu/projects/datasets/test7', patch) + # o,e= shellGitCheckout(cmd) + # # print(o) + # # print(e) + # + # if len(e)> 0: + # print(patch) + # + # # cmd = 'git -C ' + f + ' push' + # # o, e = shellGitCheckout(cmd) + # # o,e + + + + # for i,v in zip(travisChanges.commit.index.tolist(),travisChanges.commit.values.tolist()): + # i,v + + + # ''' + # git checkout master + # git branch -D start + # git checkout -f -b start c6a259aeeca4814a97cf6694814c63e74e4e18fa + # git checkout -f -b start 1cb8068b970358e7b0f47935a2636ca6ae8eceb6 + # git gc --prune=now + # git push --set-upstream origin startTravis + + #563 from test branch start + # ''' else: logging.error('Unknown job %s',job) except Exception as e: diff --git a/python/otherDatasets.py b/python/otherDatasets.py index 1aecb49..8ca472d 100644 --- a/python/otherDatasets.py +++ b/python/otherDatasets.py @@ -19,6 +19,7 @@ def checkoutFiles(sha,shaOld, filePath,type, repo=None): folderDiff = join(type, 'DiffEntries') folderPrev = join(type, 'prevFiles') folderRev = join( type, 'revFiles') + folderPatch = join( type, 'patches') if not os.path.exists(folderDiff): os.mkdir(folderDiff) @@ -27,14 +28,15 @@ def checkoutFiles(sha,shaOld, filePath,type, repo=None): if not os.path.exists(folderRev): os.mkdir(folderRev) - + if not os.path.exists(folderPatch): + os.mkdir(folderPatch) # if repo is None: # repo = join(REPO_PATH,repoName) savePath = filePath.replace('/','#') - if not isfile(folderDiff + '/' + sha + '_' + shaOld + '_' + savePath + '.txt'): + if not isfile(folderDiff + '/' + shaOld + '_' + sha + '_' + savePath + '.txt'): cmd = 'git -C ' + repo + ' diff -U ' + shaOld + ':' + filePath + '..' + sha + ':' + filePath # + '> ' + folderDiff + '/' + sha + '_' + shaOld + '_' + savePath.replace('.java','.txt') @@ -52,21 +54,28 @@ def checkoutFiles(sha,shaOld, filePath,type, repo=None): numberOfHunks = re.findall('@@\s\-\d+,*\d*\s\+\d+,*\d*\s@@', matched) if len(numberOfHunks) == 0: return - diffFile = shaOld + '\n' + matched.replace(' @@ ', ' @@\n') + # diffFile = shaOld + '\n' + matched.replace(' @@ ', ' @@\n') + diffFile = matched.replace(' @@ ', ' @@\n') - with open(folderDiff + '/' + sha + '_' + shaOld + '_' + savePath + '.txt', + with open(folderDiff + '/' + shaOld + '_' + sha + '_' + savePath + '.txt', 'w') as writeFile: writeFile.writelines(diffFile) + # cmd = 'git -C ' + repo + ' format-patch -M100% --text --full-index --binary -n ' + shaOld + '..' + sha + ' -o ' + join( + # folderPatch, sha +'_'+str(len(numberOfHunks))+'_' +'.patch') + cmd = 'git -C ' + repo + ' diff -U --patch ' + shaOld + ':' + filePath + '..' + sha + ':' + filePath + ' > ' + join( + folderPatch, shaOld + '_' + sha + '_' + savePath + '.patch' + '_'+str(len(numberOfHunks))) + + o, errors = shellGitCheckout(cmd, enc='latin1') - cmd = 'git -C ' + repo + ' show ' + sha + ':' + filePath + '> ' + folderRev + '/' + sha + '_' + shaOld + '_' +savePath + cmd = 'git -C ' + repo + ' show ' + sha + ':' + filePath + '> ' + folderRev + '/' + shaOld + '_' + sha + '_' +savePath if errors: # print(errors) raise FileNotFoundError o,errors= shellGitCheckout(cmd,enc='latin1') - cmd = 'git -C ' + repo + ' show ' + shaOld + ':' + filePath + '> ' + folderPrev + '/' + 'prev_'+sha + '_' + shaOld + '_' +savePath + cmd = 'git -C ' + repo + ' show ' + shaOld + ':' + filePath + '> ' + folderPrev + '/' + 'prev_'+shaOld + '_' + sha + '_' +savePath if errors: # print(errors) raise FileNotFoundError @@ -77,12 +86,12 @@ def checkoutFiles(sha,shaOld, filePath,type, repo=None): raise FileNotFoundError except FileNotFoundError as fnfe: - if isfile(folderRev + '/' + sha + '_' + shaOld + '_' +savePath): - os.remove(folderRev + '/' + sha + '_' + shaOld + '_' +savePath) - if isfile(folderPrev + '/' + 'prev_'+sha + '_' + shaOld + '_' +savePath): - os.remove(folderPrev + '/' + 'prev_'+sha + '_' + shaOld + '_' +savePath) - if isfile(folderDiff + '/' + sha + '_' + shaOld + '_' + savePath.replace('.java','.txt')): - os.remove(folderDiff + '/' + sha + '_' + shaOld + '_' + savePath.replace('.java','.txt')) + if isfile(folderRev + '/' + shaOld + '_' + sha + '_' +savePath): + os.remove(folderRev + '/' + shaOld + '_' + sha + '_' +savePath) + if isfile(folderPrev + '/' + 'prev_'+shaOld + '_' + sha + '_' +savePath): + os.remove(folderPrev + '/' + 'prev_'+shaOld + '_' + sha + '_' +savePath) + if isfile(folderDiff + '/' + shaOld + '_' + sha + '_' + savePath.replace('.java','.txt')): + os.remove(folderDiff + '/' + shaOld + '_' + sha + '_' + savePath.replace('.java','.txt')) # print(fnfe) # raise Exception(fnfe) except Exception as e: @@ -283,6 +292,8 @@ def core(): print(repo) cmd = 'git config --global http.postBuffer 157286400' shellCallTemplate(cmd) + cmd = 'git config --global diff.renamelimit 0' + shellCallTemplate(cmd) cmd = 'git -C ' + DATASET_PATH + ' clone ' + src shellCallTemplate(cmd) logging.info(repo) diff --git a/python/patchManyBugs.py b/python/patchManyBugs.py index 6486c14..04bfe6a 100644 --- a/python/patchManyBugs.py +++ b/python/patchManyBugs.py @@ -118,10 +118,10 @@ def patchCore(): manybugs = [i for i in manybugs if i in sosbugs] spfiles = listdir(join(DATASET,'cocci')) - from sprinferIndex import filterPatterns - filteredPattern = filterPatterns() + # from sprinferIndex import filterPatterns + # filteredPattern = filterPatterns() - spfiles = [i for i in spfiles if i in filteredPattern] + # spfiles = [i for i in spfiles if i in filteredPattern] workList = [] for manybug in manybugs: if manybug == '.DS_Store': diff --git a/python/patch_validate.py b/python/patch_validate.py index 708597a..71cc702 100755 --- a/python/patch_validate.py +++ b/python/patch_validate.py @@ -285,7 +285,7 @@ def patch_validate(): if b== '.DS_Store': continue t = b, port - if (b.startswith('manybugs:php:')): + if (b.startswith('manybugs:')): bugList.append(t) if port == 6300: port = 6000 @@ -300,6 +300,7 @@ def patch_validate(): # parallelRun(findTests,bugList,max_workers=1) results = parallelRunMerge(validateCore, bugList) # # + print(results) with open(join(DATA_PATH, 'mayBugsValidateNew'), 'w', encoding='utf-8') as writeFile: # if levelPatch == 0: diff --git a/python/sprinferIndex.py b/python/sprinferIndex.py index 8544ea6..e725562 100644 --- a/python/sprinferIndex.py +++ b/python/sprinferIndex.py @@ -33,7 +33,7 @@ def indexCore(): # singleHunkedFiles = sDF.fileName.unique().tolist() # singleHunkedFiles = [i.replace('.txt', '') for i in singleHunkedFiles] - clusterPath = join(DATA_PATH, 'shapes') + clusterPath = join(DATA_PATH, 'actions') roots = listdir(clusterPath) roots = [i for i in roots if not (i.startswith('.') or i.endswith('.pickle'))] @@ -186,32 +186,48 @@ def divideCoccis(): iFile.writelines(idx[t[1]:]) os.remove(join(SPINFER_INDEX_PATH, 'cocci', cocci)) +def getFreqPatterns(): + patterns = load_zipped_pickle(join(DATA_PATH, 'allCocciPatterns.pickle')) + freqs = patterns.pattern.value_counts().to_dict() + + allPatterns = patterns.cid.values.tolist() + uniquePatterns = patterns.drop_duplicates(subset=['pattern']).cid.values.tolist() + + uniquePatterns = patterns[patterns.cid.isin(uniquePatterns)] + + uniquePatterns['newFreq'] = uniquePatterns.pattern.apply(lambda x: freqs[x]) + + re.search(r"// Recall:(.*), Precision:(.*), Matching recall:(.*)") + + def removeDuplicates(): commentPattern = r"(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)" coccis =os.listdir(join(SPINFER_INDEX_PATH, 'cocci')) - cocciPatterns = pd.DataFrame(columns=['cid', 'pattern','inferedFrom']) + cocciPatterns = pd.DataFrame(columns=['cid', 'pattern','inferedFrom','recall','precision','matchingRecall']) ind = 0 for cocci in coccis: with open(join(SPINFER_INDEX_PATH, 'cocci', cocci), 'r') as iFile: idx = iFile.read() idx inferedFrom = re.search(r"// Infered from:(.*)\n",idx).groups() + recall,precision, matchingRecall = re.search(r"// Recall:(.*), Precision:(.*), Matching recall:(.*)",idx).groups() pattern = re.sub(commentPattern, '', idx, re.DOTALL) - cocciPatterns.loc[ind] = [cocci,pattern,inferedFrom] + cocciPatterns.loc[ind] = [cocci,pattern,inferedFrom,recall.strip(),precision.strip(),matchingRecall.strip()] ind = ind +1 cocciPatterns['iFiles'] = cocciPatterns.inferedFrom.apply(lambda x: getInferred(x[0])) cocciPatterns['freq'] = cocciPatterns.iFiles.apply(lambda x: len(x)) cocciPatterns['project'] = cocciPatterns.iFiles.apply(lambda x: list(set([i.split('/{')[0].replace('(','') for i in x]))) cocciPatterns.sort_values(by='freq', inplace=True, ascending=False) - save_zipped_pickle(cocciPatterns,join(DATA_PATH,'allCocciPatterns.pickle')) - allPatterns = cocciPatterns.cid.values.tolist() - uniquePatterns = cocciPatterns.drop_duplicates(subset=['pattern']).cid.values.tolist() - toRemove = list(set(allPatterns).difference(uniquePatterns)) - print(toRemove) - for p in toRemove: - os.remove(join(SPINFER_INDEX_PATH, 'cocci', p)) - print(len(uniquePatterns)) + # save_zipped_pickle(cocciPatterns,join(DATA_PATH,'allCocciPatterns.pickle')) + save_zipped_pickle(cocciPatterns,join(DATA_PATH,'allCocciPatterns2.pickle')) + # allPatterns = cocciPatterns.cid.values.tolist() + # uniquePatterns = cocciPatterns.drop_duplicates(subset=['pattern']).cid.values.tolist() + # toRemove = list(set(allPatterns).difference(uniquePatterns)) + # print(toRemove) + # for p in toRemove: + # os.remove(join(SPINFER_INDEX_PATH, 'cocci', p)) + # print(len(uniquePatterns)) def filterPatterns(): @@ -322,6 +338,8 @@ def patchCoreIntro(): filterList =[] for manybug in manybugs: + if manybug == '.DS_Store': + continue # files = listdir(join(join(DATA_PATH,"manybugs",manybug,'diffs'))) if os.path.exists(join(DATA_PATH, "introclass", manybug, 'patches')): shutil.rmtree(join(DATA_PATH, "introclass", manybug, 'patches')) diff --git a/python/stats.py b/python/stats.py index b2f11e3..62df327 100644 --- a/python/stats.py +++ b/python/stats.py @@ -2,6 +2,8 @@ from common.commons import * DATA_PATH = os.environ["DATA_PATH"] PROJECT_TYPE = os.environ["PROJECT_TYPE"] REDIS_PORT = os.environ["REDIS_PORT"] +ROOT = os.environ["ROOT_DIR"] +INNER_DATA_PATH = join(ROOT,'data') def statsNormal(isFixminer=True): # tokens = join(DATA_PATH, 'tokens') @@ -226,6 +228,7 @@ def stats(type,isFixminer=True): if shape.startswith('.'): continue sizes = listdir(join(shapesPath, shape)) + logging.debug(shape + ":" + str(len(sizes))) for size in sizes: if size.startswith('.'): @@ -453,11 +456,16 @@ javaAst = ["AnonymousClassDeclaration", "ArrayAccess", "ArrayCreation", "ArrayIn "LambdaExpression", "IntersectionType", "NameQualifiedType", "CreationReference", "ExpressionMethodReference", "SuperMethodReference", "TypeMethodReference", "MethodName", "Operator", "New", "Instanceof"] -cAst = ["unit","comment","literal","operator","modifier","name","type","condition","block","index","decltype","typename","atomic","assert","generic_selection","selector","association_list","association","expr_stmt","expr","decl_stmt","decl","init","range","break","continue","goto","label","typedef","asm","macro","enum","enum_decl","if","ternary","then","else","elseif","while","typeof","do","switch","case","default","for","foreach","control","incr","function","function_decl","lambda","specifier","return","call","sizeof","parameter_list","parameter","krparameter_list","krparameter","argument_list","argument","capture","struct","struct_decl","union","union_decl","class","class_decl","public","private","protected","signals","forever","emit","member_init_list","constructor","constructor_decl","destructor","destructor_decl","super","friend","extern","namespace","using","try","catch","finally","throw","throws","noexcept","template","directive","file","number","include","define","undef","line","ifdef","ifndef","elif","endif","pragma","error","warning","value","empty","region","endregion","import","marker","parse","mode","lock","fixed","checked","unchecked","unsafe","using_stmt","delegate","event","constraint","extends","implements","package","synchronized","interface","interface_decl","annotation_defn","static","attribute","target","linq","from","select","where","let","orderby","group","join","in","on","equals","by","into","escape","annotation","alignas","alignof","typeid","ref_qualifier","receiver","message","protocol_list","category","protocol","required","optional","property","attribute_list","synthesize","dynamic","encode","autoreleasepool","compatibility_alias","protocol_decl","cast","position","clause","empty_stmt"] - +# cAst = ["unit","comment","literal","operator","modifier","name","type","condition","block","index","decltype","typename","atomic","assert","generic_selection","selector","association_list","association","expr_stmt","expr","decl_stmt","decl","init","range","break","continue","goto","label","typedef","asm","macro","enum","enum_decl","if","ternary","then","else","elseif","while","typeof","do","switch","case","default","for","foreach","control","incr","function","function_decl","lambda","specifier","return","call","sizeof","parameter_list","parameter","krparameter_list","krparameter","argument_list","argument","capture","struct","struct_decl","union","union_decl","class","class_decl","public","private","protected","signals","forever","emit","member_init_list","constructor","constructor_decl","destructor","destructor_decl","super","friend","extern","namespace","using","try","catch","finally","throw","throws","noexcept","template","directive","file","number","include","define","undef","line","ifdef","ifndef","elif","endif","pragma","error","warning","value","empty","region","endregion","import","marker","parse","mode","lock","fixed","checked","unchecked","unsafe","using_stmt","delegate","event","constraint","extends","implements","package","synchronized","interface","interface_decl","annotation_defn","static","attribute","target","linq","from","select","where","let","orderby","group","join","in","on","equals","by","into","escape","annotation","alignas","alignof","typeid","ref_qualifier","receiver","message","protocol_list","category","protocol","required","optional","property","attribute_list","synthesize","dynamic","encode","autoreleasepool","compatibility_alias","protocol_decl","cast","position","clause","empty_stmt"] +cAst = ["unit" ,"comment:block", "comment:line", "literal:string", "literal:char", "literal:number", "literal:boolean", "literal:null", "literal:complex", "operator", "modifier", "name", "type", "type:prev", "block", "block_content", "block:pseudo", "index", "decltype", "typename", "atomic", "assert:static", "generic_selection", "selector", "association_list", "association", "expr_stmt", "expr", "decl_stmt", "decl", "init", "range", "break", "continue", "goto", "label", "typedef", "asm", "macro", "enum", "enum_decl", "if_stmt", "if", "ternary", "then", "else", "if:elseif", "while", "typeof", "do", "switch", "case", "default", "for", "foreach", "control", "condition", "incr", "function", "function_decl", "lambda", "specifier", "return", "call", "sizeof", "parameter_list", "parameter", "krparameter_list", "krparameter", "argument_list", "argument", "capture", "parameter_list:pseudo", "parameter_list:indexer", "struct", "struct_decl", "union", "union_decl", "class", "class_decl", "public", "public:default", "private", "private:default", "protected", "protected:default", "signals", "forever", "emit", "member_init_list", "constructor", "constructor_decl", "destructor", "destructor_decl", "super_list", "super", "friend", "extern", "namespace", "using", "try", "catch", "finally", "throw", "throws", "noexcept", "template", "argument_list:generic", "parameter_list:generic", "directive", "file", "number", "literal", "include", "define", "undef", "line", "cpp:if", "ifdef", "ifndef", "cpp:else", "elif", "endif", "cpp:then", "pragma", "error", "warning", "value", "empty", "marker", "region", "endregion", "import", "parse", "mode", "lock", "fixed", "checked", "unchecked", "unsafe", "using_stmt", "delegate", "event", "constraint", "extends", "implements", "package", "assert", "synchronized", "interface", "interface_decl", "annotation_defn", "static", "attribute", "target", "linq", "from", "select", "where", "let", "orderby", "group", "join", "in", "on", "equals", "by", "into", "escape", "annotation", "alignas", "alignof", "typeid", "sizeof:pack", "enum:class", "enum_decl:class", "function:operator", "function_decl:operator", "ref_qualifier", "receiver", "message", "protocol_list", "category", "clause"] def exportAbstractPatterns(): + dbDir = join(INNER_DATA_PATH, 'redis') + + portInner = REDIS_PORT + startDB(dbDir, portInner, PROJECT_TYPE) clusterStats,df = stats('actions') + logging.debug(len(clusterStats)) port = REDIS_PORT import redis redis_db = redis.StrictRedis(host="localhost", port=port, db=0) diff --git a/python/test_patched_file.py b/python/test_patched_file.py index e916e0e..50b826a 100755 --- a/python/test_patched_file.py +++ b/python/test_patched_file.py @@ -5,13 +5,14 @@ import signal # from common.commons import shellGitCheckout DATA_PATH = os.environ["DATA_PATH"] +ROOT_DIR = os.environ["ROOT_DIR"] introClassFile = join(DATA_PATH,'introClassData.txt') def testCore(t): bugName, port = t container = None # with bugzoo.server.ephemeral(port=port, verbose=False,bugzooPath="/Users/anil.koyuncu/anaconda3/envs/python36/bin/bugzood", timeout_connection=3000) as client: - cmd = 'bash {} {}'.format(join(DATA_PATH,'startBugzoo.sh'),port) + cmd = 'bash {} {}'.format(join(ROOT_DIR,'data','startBugzoo.sh'),port) with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) as process: diff --git a/python/validate_manybugs.py b/python/validate_manybugs.py index c430a48..1358179 100644 --- a/python/validate_manybugs.py +++ b/python/validate_manybugs.py @@ -5,6 +5,7 @@ import os from common.commons import * import signal DATA_PATH = os.environ["DATA_PATH"] +ROOT_DIR = os.environ["ROOT_DIR"] BUGDIR = join(DATA_PATH,'manybugs') PATCHES_DIR = join(DATA_PATH,'manybugs_sos') @@ -17,7 +18,8 @@ def patch_validate(t): # bug = client.bugs['introclass:checksum:08c7ea:006'] # client,process = getClient(port) container = None - cmd = 'bash {} {}'.format(join(DATA_PATH, 'startBugzoo.sh'), port) + + cmd = 'bash {} {}'.format(join(ROOT_DIR, 'data', 'startBugzoo.sh'), port) with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) as process: # o,e = shellGitCheckout(cmd)