#!/usr/bin/python import filecmp import os #Compare two files print os.stat('server.py'); print os.stat('server2.py'); print filecmp.cmp('server.py', 'server2.py', shallow = False);
#Compare the assignments submitted by students #Compare all files from one student with all files from another student #!/usr/bin/python import filecmp import os import sys def getFiles(d): '''Get all files in a folder ''' files = [] for (dirpath, dirnames, filenames) in os.walk(d): for f in filenames: if f[0] != '.': files.append(os.path.join(dirpath, f)); return files; def getDir(d): '''Get all directories in a folder ''' l = os.listdir(d); return [os.path.join(d, e) for e in l if os.path.isdir(os.path.join(d,e))]; def compareFolders(dir1, dir2): '''Compare the files in a directory with the files in another directory ''' files_1 = getFiles(dir1); files_2 = getFiles(dir2); for f1 in files_1: for f2 in files_2: if filecmp.cmp(f1, f2, shallow = False): print f1, f2 if __name__ == '__main__': if len(sys.argv) != 2: print 'Usage: python comparison.py dir' sys.exit(1); #Get all folders folders = getDir(sys.argv[1]); for index, d1 in enumerate(folders): for d2 in range(index+1, len(folders)): compareFolders(d1, folders[d2]);
#Compare the assignments submitted by students #Compare all files from one student with all files from another student, and report file pairs that have high similarity #!/usr/bin/python import filecmp import os import sys from sklearn.feature_extraction.text import TfidfVectorizer def getFiles(d): '''Get all files in a folder ''' files = [] for (dirpath, dirnames, filenames) in os.walk(d): for f in filenames: if f[0] != '.': files.append(os.path.join(dirpath, f)); return files; def getDir(d): '''Get all directories in a folder ''' l = os.listdir(d); return [os.path.join(d, e) for e in l if os.path.isdir(os.path.join(d,e))]; def compareFiles(f1, f2): '''Check the similarity of a pair of files ''' with open(f1, 'r') as content_file: c1= content_file.read() with open(f2, 'r') as content_file: c2= content_file.read() documents = [c1, c2]; tfidf = TfidfVectorizer().fit_transform(documents) pairwise_similarity = tfidf * tfidf.T return pairwise_similarity[0, 1] def compareFolders(dir1, dir2): '''Compare the files in a directory with the files in another directory ''' files_1 = getFiles(dir1); files_2 = getFiles(dir2); for f1 in files_1: for f2 in files_2: similarity = compareFiles(f1, f2); if similarity > 0.8: print f1, f2, similarity if __name__ == '__main__': if len(sys.argv) != 2: print 'Usage: python comparison.py dir' sys.exit(1); #Get all folders folders = getDir(sys.argv[1]); for index, d1 in enumerate(folders): for d2 in range(index+1, len(folders)): compareFolders(d1, folders[d2]);
#import filecmp dc = filecmp.dircmp(dir_left, dir_right); print dc.left_list # terms in left directory print dc.right_list # terms in right directory print dc.left_only # terms in left directory only print dc.right_only # terms in right directory only print dc.common_files # files in both directories print dc.diff_files # same file name but different content, compared with os.state() only print dc.same_files # same file name and same content