info ( "Skipping the analysis of %s file" % p ) continue sf = json2sframe ( p ) sf. info ( "Analyzing of %s " % p ) outp = sframes_dir + os. join ( root, filename )) return matches #Creating all SFrames for p in match_files_in_dir ( basedir, "*.bz2" ): logging. walk ( basedir ): for filename in fnmatch. close () return outpath def match_files_in_dir ( basedir, ext ): """ Find all files in the basedir with 'ext' as filename extension :param basedir: input basedir :param ext: filename extension :return: list of file paths with the input extension """ matches = for root, dirnames, filenames in os. BZ2File ( inpath, 'rb' ) for data in iter ( lambda : in_file. info ( "Decompressing file %s to %s " % ( inpath, outpath )) in_file = bz2. basename ( inpath ) + ".decompressed" out_file = file ( outpath, 'wb' ) logging. fromtimestamp ( float ( utc ))) return sf def decompress_bz2 ( inpath, outpath = None ): """ Decompress bz2 to the outpath, if the outpath is not provided then decompress the file to the inpath directory :param inpath: decompress bz2 file to the outpath :param outpath: output path for the decompress file :return: the output file path """ if outpath is None : outpath = tmp_dir + os. remove ( dpath ) #add datetime information sf = get_month_from_path ( path ) sf = get_year_from_path ( path ) sf = sf. read_json ( dpath, orient = "lines" ) #remove the decompressed file os.
![arrival movie torrent reddit arrival movie torrent reddit](https://www.opuradio.com/u_file/1801/photo/299348dab4.jpg)
read_json ( path, orient = "lines" ) else : dpath = decompress_bz2 ( path ) sf = gl. The SFrame also contains information regarding each post date & time :rtype: gl.SFrame """ if not path.
![arrival movie torrent reddit arrival movie torrent reddit](https://www.opuradio.com/u_file/1801/photo/fb0c78cb19.jpg)
:return: SFrame object created from the file in the input path. The file can also be compressed in bz2 format.
![arrival movie torrent reddit arrival movie torrent reddit](https://cdn.vox-cdn.com/thumbor/euLlR_-km2zWrggvPftvzWvnN_Q=/0x0:6144x4080/1200x800/filters:focal(2581x1549:3563x2531)/cdn.vox-cdn.com/uploads/chorus_image/image/52766567/La_La_Let_s_Go_To_The_Movies.0.jpg)
split ( "-" )) def json2sframe ( path ): """ Creates an SFrame object from the file in the input path :param path: path to a file that contains a list of JSON objects, each JSON is saved in a separate line. split ( "-" )) def get_year_from_path ( path ): y = os. set_runtime_config ( 'GRAPHLAB_CACHE_FILE_LOCATIONS', '/mnt/tmp' ) base_dir = "/mnt/data/reddit" # Replace this with the directory which you downloaded the file into sframes_dir = base_dir + "/sframes/" # Replace this with the directory you want to save the SFrame to tmp_dir = "/mnt/tmp" # Replace this with the directory you want to save the SFrame to def get_month_from_path ( path ): m = os. Import os import logging import bz2 from datetime import datetime import graphlab as gl import graphlab.aggregate as agg import fnmatch gl.